{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# -*- coding: utf-8 -*-\n",
    "\"\"\"\n",
    "@author: lrhao\n",
    "@software: jupyter\n",
    "@file: baseline.ipynb\n",
    "@time: 2020-12-11\n",
    "@description：\n",
    "\"\"\"\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from tqdm import tqdm\n",
    "import os\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((40000, 21), (15000, 20), (15000, 2))"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train = pd.read_csv('../公积金逾期预测-数据/train.csv')\n",
    "test = pd.read_csv('../公积金逾期预测-数据/test.csv')\n",
    "submit = pd.read_csv('../公积金逾期预测-数据/submit.csv')\n",
    "train.shape, test.shape, submit.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>XINGBIE</th>\n",
       "      <th>CSNY</th>\n",
       "      <th>HYZK</th>\n",
       "      <th>ZHIYE</th>\n",
       "      <th>ZHICHEN</th>\n",
       "      <th>ZHIWU</th>\n",
       "      <th>XUELI</th>\n",
       "      <th>DWJJLX</th>\n",
       "      <th>DWSSHY</th>\n",
       "      <th>...</th>\n",
       "      <th>GRZHZT</th>\n",
       "      <th>GRZHYE</th>\n",
       "      <th>GRZHSNJZYE</th>\n",
       "      <th>GRZHDNGJYE</th>\n",
       "      <th>GRYJCE</th>\n",
       "      <th>DWYJCE</th>\n",
       "      <th>DKFFE</th>\n",
       "      <th>DKYE</th>\n",
       "      <th>DKLL</th>\n",
       "      <th>label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>train_0</td>\n",
       "      <td>1</td>\n",
       "      <td>1038672000</td>\n",
       "      <td>90</td>\n",
       "      <td>90</td>\n",
       "      <td>999</td>\n",
       "      <td>0</td>\n",
       "      <td>99</td>\n",
       "      <td>150</td>\n",
       "      <td>12</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>3223.515</td>\n",
       "      <td>801.310</td>\n",
       "      <td>837.000</td>\n",
       "      <td>312.00</td>\n",
       "      <td>312.00</td>\n",
       "      <td>175237</td>\n",
       "      <td>154112.935</td>\n",
       "      <td>2.708</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>train_1</td>\n",
       "      <td>2</td>\n",
       "      <td>504892800</td>\n",
       "      <td>90</td>\n",
       "      <td>90</td>\n",
       "      <td>999</td>\n",
       "      <td>0</td>\n",
       "      <td>99</td>\n",
       "      <td>110</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>18055.195</td>\n",
       "      <td>53213.220</td>\n",
       "      <td>1065.200</td>\n",
       "      <td>795.84</td>\n",
       "      <td>795.84</td>\n",
       "      <td>300237</td>\n",
       "      <td>298252.945</td>\n",
       "      <td>2.979</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>train_2</td>\n",
       "      <td>1</td>\n",
       "      <td>736185600</td>\n",
       "      <td>90</td>\n",
       "      <td>90</td>\n",
       "      <td>999</td>\n",
       "      <td>0</td>\n",
       "      <td>99</td>\n",
       "      <td>150</td>\n",
       "      <td>9</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>27426.600</td>\n",
       "      <td>13963.140</td>\n",
       "      <td>7230.020</td>\n",
       "      <td>1444.20</td>\n",
       "      <td>1444.20</td>\n",
       "      <td>150237</td>\n",
       "      <td>147339.130</td>\n",
       "      <td>2.708</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>train_3</td>\n",
       "      <td>1</td>\n",
       "      <td>428515200</td>\n",
       "      <td>90</td>\n",
       "      <td>90</td>\n",
       "      <td>999</td>\n",
       "      <td>0</td>\n",
       "      <td>99</td>\n",
       "      <td>150</td>\n",
       "      <td>7</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>111871.130</td>\n",
       "      <td>99701.265</td>\n",
       "      <td>2271.295</td>\n",
       "      <td>1417.14</td>\n",
       "      <td>1417.14</td>\n",
       "      <td>350237</td>\n",
       "      <td>300653.780</td>\n",
       "      <td>2.708</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>train_4</td>\n",
       "      <td>2</td>\n",
       "      <td>544204800</td>\n",
       "      <td>90</td>\n",
       "      <td>90</td>\n",
       "      <td>999</td>\n",
       "      <td>0</td>\n",
       "      <td>99</td>\n",
       "      <td>900</td>\n",
       "      <td>14</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>237.000</td>\n",
       "      <td>11028.875</td>\n",
       "      <td>35.780</td>\n",
       "      <td>325.50</td>\n",
       "      <td>325.50</td>\n",
       "      <td>150237</td>\n",
       "      <td>145185.010</td>\n",
       "      <td>2.708</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        id  XINGBIE        CSNY  HYZK  ZHIYE  ZHICHEN  ZHIWU  XUELI  DWJJLX  \\\n",
       "0  train_0        1  1038672000    90     90      999      0     99     150   \n",
       "1  train_1        2   504892800    90     90      999      0     99     110   \n",
       "2  train_2        1   736185600    90     90      999      0     99     150   \n",
       "3  train_3        1   428515200    90     90      999      0     99     150   \n",
       "4  train_4        2   544204800    90     90      999      0     99     900   \n",
       "\n",
       "   DWSSHY  ...  GRZHZT      GRZHYE  GRZHSNJZYE  GRZHDNGJYE   GRYJCE   DWYJCE  \\\n",
       "0      12  ...       1    3223.515     801.310     837.000   312.00   312.00   \n",
       "1       0  ...       1   18055.195   53213.220    1065.200   795.84   795.84   \n",
       "2       9  ...       1   27426.600   13963.140    7230.020  1444.20  1444.20   \n",
       "3       7  ...       1  111871.130   99701.265    2271.295  1417.14  1417.14   \n",
       "4      14  ...       1     237.000   11028.875      35.780   325.50   325.50   \n",
       "\n",
       "    DKFFE        DKYE   DKLL  label  \n",
       "0  175237  154112.935  2.708      0  \n",
       "1  300237  298252.945  2.979      0  \n",
       "2  150237  147339.130  2.708      0  \n",
       "3  350237  300653.780  2.708      0  \n",
       "4  150237  145185.010  2.708      0  \n",
       "\n",
       "[5 rows x 21 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>HYZK</th>\n",
       "      <th>ZHIYE</th>\n",
       "      <th>ZHICHEN</th>\n",
       "      <th>DWJJLX</th>\n",
       "      <th>DWSSHY</th>\n",
       "      <th>GRZHZT</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>90</td>\n",
       "      <td>90</td>\n",
       "      <td>999</td>\n",
       "      <td>150</td>\n",
       "      <td>12</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>90</td>\n",
       "      <td>90</td>\n",
       "      <td>999</td>\n",
       "      <td>110</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>90</td>\n",
       "      <td>90</td>\n",
       "      <td>999</td>\n",
       "      <td>150</td>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>90</td>\n",
       "      <td>90</td>\n",
       "      <td>999</td>\n",
       "      <td>150</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>90</td>\n",
       "      <td>90</td>\n",
       "      <td>999</td>\n",
       "      <td>900</td>\n",
       "      <td>14</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39995</th>\n",
       "      <td>90</td>\n",
       "      <td>90</td>\n",
       "      <td>999</td>\n",
       "      <td>110</td>\n",
       "      <td>16</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39996</th>\n",
       "      <td>90</td>\n",
       "      <td>90</td>\n",
       "      <td>999</td>\n",
       "      <td>110</td>\n",
       "      <td>14</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39997</th>\n",
       "      <td>90</td>\n",
       "      <td>90</td>\n",
       "      <td>999</td>\n",
       "      <td>143</td>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39998</th>\n",
       "      <td>90</td>\n",
       "      <td>90</td>\n",
       "      <td>999</td>\n",
       "      <td>150</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39999</th>\n",
       "      <td>90</td>\n",
       "      <td>90</td>\n",
       "      <td>999</td>\n",
       "      <td>110</td>\n",
       "      <td>14</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>40000 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       HYZK  ZHIYE  ZHICHEN  DWJJLX  DWSSHY  GRZHZT\n",
       "0        90     90      999     150      12       1\n",
       "1        90     90      999     110       0       1\n",
       "2        90     90      999     150       9       1\n",
       "3        90     90      999     150       7       1\n",
       "4        90     90      999     900      14       1\n",
       "...     ...    ...      ...     ...     ...     ...\n",
       "39995    90     90      999     110      16       1\n",
       "39996    90     90      999     110      14       1\n",
       "39997    90     90      999     143       9       1\n",
       "39998    90     90      999     150       6       1\n",
       "39999    90     90      999     110      14       1\n",
       "\n",
       "[40000 rows x 6 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cate_2_cols = ['XINGBIE', 'ZHIWU', 'XUELI']\n",
    "cate_cols = ['HYZK', 'ZHIYE', 'ZHICHEN', 'DWJJLX', 'DWSSHY', 'GRZHZT']\n",
    "train[cate_cols]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>GRJCJS</th>\n",
       "      <th>GRZHYE</th>\n",
       "      <th>GRZHSNJZYE</th>\n",
       "      <th>GRZHDNGJYE</th>\n",
       "      <th>GRYJCE</th>\n",
       "      <th>DWYJCE</th>\n",
       "      <th>DKFFE</th>\n",
       "      <th>DKYE</th>\n",
       "      <th>DKLL</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1737.0</td>\n",
       "      <td>3223.515</td>\n",
       "      <td>801.310</td>\n",
       "      <td>837.000</td>\n",
       "      <td>312.00</td>\n",
       "      <td>312.00</td>\n",
       "      <td>175237</td>\n",
       "      <td>154112.935</td>\n",
       "      <td>2.708</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4894.0</td>\n",
       "      <td>18055.195</td>\n",
       "      <td>53213.220</td>\n",
       "      <td>1065.200</td>\n",
       "      <td>795.84</td>\n",
       "      <td>795.84</td>\n",
       "      <td>300237</td>\n",
       "      <td>298252.945</td>\n",
       "      <td>2.979</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10297.0</td>\n",
       "      <td>27426.600</td>\n",
       "      <td>13963.140</td>\n",
       "      <td>7230.020</td>\n",
       "      <td>1444.20</td>\n",
       "      <td>1444.20</td>\n",
       "      <td>150237</td>\n",
       "      <td>147339.130</td>\n",
       "      <td>2.708</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10071.5</td>\n",
       "      <td>111871.130</td>\n",
       "      <td>99701.265</td>\n",
       "      <td>2271.295</td>\n",
       "      <td>1417.14</td>\n",
       "      <td>1417.14</td>\n",
       "      <td>350237</td>\n",
       "      <td>300653.780</td>\n",
       "      <td>2.708</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2007.0</td>\n",
       "      <td>237.000</td>\n",
       "      <td>11028.875</td>\n",
       "      <td>35.780</td>\n",
       "      <td>325.50</td>\n",
       "      <td>325.50</td>\n",
       "      <td>150237</td>\n",
       "      <td>145185.010</td>\n",
       "      <td>2.708</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39995</th>\n",
       "      <td>3185.0</td>\n",
       "      <td>82865.575</td>\n",
       "      <td>65304.370</td>\n",
       "      <td>4026.600</td>\n",
       "      <td>590.76</td>\n",
       "      <td>590.76</td>\n",
       "      <td>150237</td>\n",
       "      <td>117816.680</td>\n",
       "      <td>2.708</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39996</th>\n",
       "      <td>1660.5</td>\n",
       "      <td>32811.585</td>\n",
       "      <td>27065.150</td>\n",
       "      <td>1603.560</td>\n",
       "      <td>407.82</td>\n",
       "      <td>407.82</td>\n",
       "      <td>300237</td>\n",
       "      <td>267615.065</td>\n",
       "      <td>2.979</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39997</th>\n",
       "      <td>3923.0</td>\n",
       "      <td>8875.695</td>\n",
       "      <td>1309.890</td>\n",
       "      <td>3185.800</td>\n",
       "      <td>605.60</td>\n",
       "      <td>605.60</td>\n",
       "      <td>175237</td>\n",
       "      <td>237.000</td>\n",
       "      <td>2.708</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39998</th>\n",
       "      <td>3527.0</td>\n",
       "      <td>2079.400</td>\n",
       "      <td>252.875</td>\n",
       "      <td>1614.490</td>\n",
       "      <td>467.30</td>\n",
       "      <td>467.30</td>\n",
       "      <td>125237</td>\n",
       "      <td>113068.900</td>\n",
       "      <td>2.708</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39999</th>\n",
       "      <td>4934.5</td>\n",
       "      <td>26210.635</td>\n",
       "      <td>27266.080</td>\n",
       "      <td>-4994.610</td>\n",
       "      <td>800.70</td>\n",
       "      <td>800.70</td>\n",
       "      <td>300237</td>\n",
       "      <td>287737.000</td>\n",
       "      <td>2.979</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>40000 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        GRJCJS      GRZHYE  GRZHSNJZYE  GRZHDNGJYE   GRYJCE   DWYJCE   DKFFE  \\\n",
       "0       1737.0    3223.515     801.310     837.000   312.00   312.00  175237   \n",
       "1       4894.0   18055.195   53213.220    1065.200   795.84   795.84  300237   \n",
       "2      10297.0   27426.600   13963.140    7230.020  1444.20  1444.20  150237   \n",
       "3      10071.5  111871.130   99701.265    2271.295  1417.14  1417.14  350237   \n",
       "4       2007.0     237.000   11028.875      35.780   325.50   325.50  150237   \n",
       "...        ...         ...         ...         ...      ...      ...     ...   \n",
       "39995   3185.0   82865.575   65304.370    4026.600   590.76   590.76  150237   \n",
       "39996   1660.5   32811.585   27065.150    1603.560   407.82   407.82  300237   \n",
       "39997   3923.0    8875.695    1309.890    3185.800   605.60   605.60  175237   \n",
       "39998   3527.0    2079.400     252.875    1614.490   467.30   467.30  125237   \n",
       "39999   4934.5   26210.635   27266.080   -4994.610   800.70   800.70  300237   \n",
       "\n",
       "             DKYE   DKLL  \n",
       "0      154112.935  2.708  \n",
       "1      298252.945  2.979  \n",
       "2      147339.130  2.708  \n",
       "3      300653.780  2.708  \n",
       "4      145185.010  2.708  \n",
       "...           ...    ...  \n",
       "39995  117816.680  2.708  \n",
       "39996  267615.065  2.979  \n",
       "39997     237.000  2.708  \n",
       "39998  113068.900  2.708  \n",
       "39999  287737.000  2.979  \n",
       "\n",
       "[40000 rows x 9 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "num_cols = ['GRJCJS', 'GRZHYE', 'GRZHSNJZYE', 'GRZHDNGJYE', 'GRYJCE', 'DWYJCE','DKFFE', 'DKYE', 'DKLL']\n",
    "train[num_cols]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "特征工程"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.concat([train, test], axis = 0).reset_index(drop = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>XINGBIE</th>\n",
       "      <th>CSNY</th>\n",
       "      <th>HYZK</th>\n",
       "      <th>ZHIYE</th>\n",
       "      <th>ZHICHEN</th>\n",
       "      <th>ZHIWU</th>\n",
       "      <th>XUELI</th>\n",
       "      <th>DWJJLX</th>\n",
       "      <th>DWSSHY</th>\n",
       "      <th>...</th>\n",
       "      <th>GRYJCE_DWYJCE</th>\n",
       "      <th>GRZHDNGJYE_GRZHSNJZYE</th>\n",
       "      <th>DKFFE_multi_DKLL_ratio</th>\n",
       "      <th>DKYE_multi_DKLL_ratio</th>\n",
       "      <th>DKYE_DKFFE_ratio</th>\n",
       "      <th>DKFFE_DKYE_ratio</th>\n",
       "      <th>GRZHYE_diff_GRZHDNGJYE</th>\n",
       "      <th>GRZHYE_diff_GRZHSNJZYE</th>\n",
       "      <th>GRYJCE_DWYJCE_ratio</th>\n",
       "      <th>DWYJCE_GRYJCE_ratio</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>train_0</td>\n",
       "      <td>1</td>\n",
       "      <td>1038672000</td>\n",
       "      <td>90</td>\n",
       "      <td>90</td>\n",
       "      <td>999</td>\n",
       "      <td>0</td>\n",
       "      <td>99</td>\n",
       "      <td>150</td>\n",
       "      <td>12</td>\n",
       "      <td>...</td>\n",
       "      <td>624.00</td>\n",
       "      <td>1638.310</td>\n",
       "      <td>0.532069</td>\n",
       "      <td>0.467931</td>\n",
       "      <td>0.467931</td>\n",
       "      <td>0.532069</td>\n",
       "      <td>2386.515</td>\n",
       "      <td>2422.205</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>train_1</td>\n",
       "      <td>2</td>\n",
       "      <td>504892800</td>\n",
       "      <td>90</td>\n",
       "      <td>90</td>\n",
       "      <td>999</td>\n",
       "      <td>0</td>\n",
       "      <td>99</td>\n",
       "      <td>110</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1591.68</td>\n",
       "      <td>54278.420</td>\n",
       "      <td>0.501658</td>\n",
       "      <td>0.498342</td>\n",
       "      <td>0.498342</td>\n",
       "      <td>0.501658</td>\n",
       "      <td>16989.995</td>\n",
       "      <td>-35158.025</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>train_2</td>\n",
       "      <td>1</td>\n",
       "      <td>736185600</td>\n",
       "      <td>90</td>\n",
       "      <td>90</td>\n",
       "      <td>999</td>\n",
       "      <td>0</td>\n",
       "      <td>99</td>\n",
       "      <td>150</td>\n",
       "      <td>9</td>\n",
       "      <td>...</td>\n",
       "      <td>2888.40</td>\n",
       "      <td>21193.160</td>\n",
       "      <td>0.504869</td>\n",
       "      <td>0.495131</td>\n",
       "      <td>0.495131</td>\n",
       "      <td>0.504869</td>\n",
       "      <td>20196.580</td>\n",
       "      <td>13463.460</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>train_3</td>\n",
       "      <td>1</td>\n",
       "      <td>428515200</td>\n",
       "      <td>90</td>\n",
       "      <td>90</td>\n",
       "      <td>999</td>\n",
       "      <td>0</td>\n",
       "      <td>99</td>\n",
       "      <td>150</td>\n",
       "      <td>7</td>\n",
       "      <td>...</td>\n",
       "      <td>2834.28</td>\n",
       "      <td>101972.560</td>\n",
       "      <td>0.538089</td>\n",
       "      <td>0.461911</td>\n",
       "      <td>0.461911</td>\n",
       "      <td>0.538089</td>\n",
       "      <td>109599.835</td>\n",
       "      <td>12169.865</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>train_4</td>\n",
       "      <td>2</td>\n",
       "      <td>544204800</td>\n",
       "      <td>90</td>\n",
       "      <td>90</td>\n",
       "      <td>999</td>\n",
       "      <td>0</td>\n",
       "      <td>99</td>\n",
       "      <td>900</td>\n",
       "      <td>14</td>\n",
       "      <td>...</td>\n",
       "      <td>651.00</td>\n",
       "      <td>11064.655</td>\n",
       "      <td>0.508550</td>\n",
       "      <td>0.491450</td>\n",
       "      <td>0.491450</td>\n",
       "      <td>0.508550</td>\n",
       "      <td>201.220</td>\n",
       "      <td>-10791.875</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 36 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        id  XINGBIE        CSNY  HYZK  ZHIYE  ZHICHEN  ZHIWU  XUELI  DWJJLX  \\\n",
       "0  train_0        1  1038672000    90     90      999      0     99     150   \n",
       "1  train_1        2   504892800    90     90      999      0     99     110   \n",
       "2  train_2        1   736185600    90     90      999      0     99     150   \n",
       "3  train_3        1   428515200    90     90      999      0     99     150   \n",
       "4  train_4        2   544204800    90     90      999      0     99     900   \n",
       "\n",
       "   DWSSHY  ...  GRYJCE_DWYJCE  GRZHDNGJYE_GRZHSNJZYE  DKFFE_multi_DKLL_ratio  \\\n",
       "0      12  ...         624.00               1638.310                0.532069   \n",
       "1       0  ...        1591.68              54278.420                0.501658   \n",
       "2       9  ...        2888.40              21193.160                0.504869   \n",
       "3       7  ...        2834.28             101972.560                0.538089   \n",
       "4      14  ...         651.00              11064.655                0.508550   \n",
       "\n",
       "   DKYE_multi_DKLL_ratio  DKYE_DKFFE_ratio  DKFFE_DKYE_ratio  \\\n",
       "0               0.467931          0.467931          0.532069   \n",
       "1               0.498342          0.498342          0.501658   \n",
       "2               0.495131          0.495131          0.504869   \n",
       "3               0.461911          0.461911          0.538089   \n",
       "4               0.491450          0.491450          0.508550   \n",
       "\n",
       "   GRZHYE_diff_GRZHDNGJYE  GRZHYE_diff_GRZHSNJZYE  GRYJCE_DWYJCE_ratio  \\\n",
       "0                2386.515                2422.205                  0.5   \n",
       "1               16989.995              -35158.025                  0.5   \n",
       "2               20196.580               13463.460                  0.5   \n",
       "3              109599.835               12169.865                  0.5   \n",
       "4                 201.220              -10791.875                  0.5   \n",
       "\n",
       "   DWYJCE_GRYJCE_ratio  \n",
       "0                  0.5  \n",
       "1                  0.5  \n",
       "2                  0.5  \n",
       "3                  0.5  \n",
       "4                  0.5  \n",
       "\n",
       "[5 rows x 36 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['missing_rate'] = (df.shape[1] - df.count(axis = 1)) / df.shape[1]\n",
    "\n",
    "df['DKFFE_DKYE'] = df['DKFFE'] + df['DKYE']\n",
    "df['DKFFE_DKY_multi_DKLL'] = (df['DKFFE'] + df['DKYE']) * df['DKLL']\n",
    "df['DKFFE_multi_DKLL'] = df['DKFFE'] * df['DKLL']\n",
    "df['DKYE_multi_DKLL'] = df['DKYE'] * df['DKLL']\n",
    "df['GRYJCE_DWYJCE'] = df['GRYJCE'] + df['DWYJCE']\n",
    "df['GRZHDNGJYE_GRZHSNJZYE'] = df['GRZHDNGJYE'] + df['GRZHSNJZYE']\n",
    "\n",
    "df['DKFFE_multi_DKLL_ratio'] = df['DKFFE'] * df['DKLL'] / df['DKFFE_DKY_multi_DKLL']\n",
    "df['DKYE_multi_DKLL_ratio'] = df['DKYE'] * df['DKLL'] / df['DKFFE_DKY_multi_DKLL']\n",
    "df['DKYE_DKFFE_ratio'] = df['DKYE'] / df['DKFFE_DKYE']\n",
    "df['DKFFE_DKYE_ratio'] = df['DKFFE'] / df['DKFFE_DKYE']\n",
    "df['GRZHYE_diff_GRZHDNGJYE'] = df['GRZHYE'] - df['GRZHDNGJYE']\n",
    "df['GRZHYE_diff_GRZHSNJZYE'] = df['GRZHYE'] - df['GRZHSNJZYE']\n",
    "df['GRYJCE_DWYJCE_ratio'] = df['GRYJCE'] / df['GRYJCE_DWYJCE']\n",
    "df['DWYJCE_GRYJCE_ratio'] = df['DWYJCE'] / df['GRYJCE_DWYJCE']\n",
    "\n",
    "\n",
    "gen_feats = ['DKFFE_DKYE', 'DKFFE_DKY_multi_DKLL', 'DKFFE_multi_DKLL', 'DKYE_multi_DKLL', 'GRYJCE_DWYJCE', \n",
    "             'GRZHDNGJYE_GRZHSNJZYE', 'DKFFE_multi_DKLL_ratio', 'DKYE_multi_DKLL_ratio', 'GRZHYE_diff_GRZHDNGJYE',\n",
    "            'GRZHYE_diff_GRZHSNJZYE', 'GRYJCE_DWYJCE_ratio', 'DWYJCE_GRYJCE_ratio', 'DKYE_DKFFE_ratio', 'DKFFE_DKYE_ratio']\n",
    "\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/sunzhongyu/opt/anaconda3/envs/python36/lib/python3.6/site-packages/seaborn/distributions.py:2551: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
      "  warnings.warn(msg, FutureWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:xlabel='age', ylabel='Density'>"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEGCAYAAAB/+QKOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAtZ0lEQVR4nO3deXicZ5nn++9dpX3frc2yvMWOl8R2HDvOAlkgHba4gdBkoWE4dKf7QLp7pmemh+E6h2E4PTMwZ04znEOYmUBoEkKapFkNhATIvjiOHTuOd1u2JVm29n1fqp7zR5USIZetsqWqt1T6fa5Ll6veeqveW2VJdz3b/ZhzDhERkel8XgcgIiKJSQlCREQiUoIQEZGIlCBERCQiJQgREYkoxesA5kpJSYmrra31OgwRkXnlzTff7HDOlUZ6LGkSRG1tLXv27PE6DBGRecXMGi70mLqYREQkIiUIERGJSAlCREQiUoIQEZGIlCBERCQiJQgREYlICUJERCJSghARkYiUIEREJKKkWUktIjIfPL6r8bxj926t8SCSmakFISIiESlBiIhIREoQIiISkRKEiIhEpAQhIiIRaRaTyDw2n2bEyPyjFoSIiESkBCEiIhEpQYiISERKECIiEpEShIiIRKQEISIiESlBiIhIREoQIiISkRKEiIhEpAQhIiIRqdSGSIJR+QxJFDFNEGZ2B/BNwA981zn3tWmPpwOPAtcAncAnnXP1ZpYKfBfYFI7xUefcf4llrCKJLFLSEIm1mHUxmZkfeBD4ALAGuMfM1kw77XNAt3NuBfAN4Ovh458A0p1z6wklj78ws9pYxSoiIueL5RjEFqDOOXfKOTcG/AjYPu2c7cAj4ds/Bm4zMwMckG1mKUAmMAb0xTBWERGZJpYJogo4M+V+U/hYxHOccxNAL1BMKFkMAs1AI/DfnHNd0y9gZveb2R4z29Pe3j7334GIyAKWqLOYtgABoBJYCvxrM1s2/STn3EPOuc3Ouc2lpaXxjlFEJKnFMkGcBRZPuV8dPhbxnHB3Uj6hwep7gaedc+POuTbgVWBzDGMVEZFpYpkgdgMrzWypmaUBdwM7pp2zA/hM+PZdwHPOOUeoW+lWADPLBq4DjsYwVhERmSZmCSI8pvAA8AxwBHjSOXfIzL5qZneGT3sYKDazOuBvgS+Gjz8I5JjZIUKJ5h+dc2/HKlYRETlfTNdBOOeeAp6aduzLU26PEJrSOv15A5GOi4hI/CTqILWIiHhMCUJERCJSLSYRj6h8hiQ6tSBERCQiJQgREYlICUJERCJSghARkYiUIEREJCIlCBERiUgJQkREIlKCEBGRiJQgREQkIiUIERGJSAlCREQiUi0mkSQTqcbTvVtrPIhE5ju1IEREJCIlCBERiUgJQkREItIYhMg8cqZriLfO9HCqY4DB0QCFWalcVV3AdcuK8fvM6/AkyShBiMwDZ7qGePpQC6c7BknxGctKs1lcmMW53mF+faCZurYB7tlSQ1qKOgVk7ihBiCSw8UCQpw4088bpLnLSU/jg+go2LykkI9X/zjlvnO7iF2+d5Z/eaOTT25ZgppaEzA0lCJEE1TM0xmOvN3Cud4QblhfzvisXkT4lMUzasrSIsXAiOXC2l6uqC+IfrCQltUdFElDfyDjffeU0nYNjfPq6JXzoqsqIyWHS9cuLqSrI5FdvNzM8FohjpJLMlCBEEszYRJDvv1rPwMgEn71hKasr8mZ8js+M7RsqGRidYHd9VxyilIVACUIkwfzucAstfSPcu7WGmqKsqJ9XXZjF0pJsdp7qJBB0MYxQFgolCJEEcrpjkFdPdnLdsmKuWJR7yc+/YXkxvcPjHGnui0F0stAoQYgkCOccTx1opiArlTvWll/Wa6yuyKMwK5XXT3XOcXSyEClBiCSI4639nO0Z5tZVZZe9nsFnxqaaQk53DNI3PD7HEcpCowQhkgCcczx7tI3CrFQ21hTO6rXWV+fjgIPneucmOFmwlCBEEsCZriGauod5zxWlsy6ZUZabQXleBm83KUHI7ChBiCSANxu7SfUbG+ZokdtV1fk0dg3RMzQ2J68nC5MShIjHxiaCvN3Uy7rK/IsuhrsU66ryATis2UwyC0oQIh47dK6X0Ykg1yyZ3djDVCU56ZTkpHGspX/OXlMWHiUIEY+93dRLQVYqtSXZc/q6qxblcrpjkLGJ4Jy+riwcShAiHhqbCHKyfYA1FXn45rgK66ryPCaCjpPtA3P6urJwKEGIeKiubYCJoGN1+cz1li5VbUkWaSk+jqqbSS6TEoSIh4629JGR6mPpHHcvAaT4fKwozeF4az/OqTaTXLqYJggzu8PMjplZnZl9McLj6Wb2RPjxXWZWO+Wxq8xsp5kdMrMDZpYRy1hF4i3oHEdb+llZlhuz7UJXLsqhd3icUx2DMXl9SW4xSxBm5gceBD4ArAHuMbM10077HNDtnFsBfAP4evi5KcBjwF8659YCNwOqGyBJpblnhIHRCVaXX3pRvmitLAu99svH22N2DUlesWxBbAHqnHOnnHNjwI+A7dPO2Q48Er79Y+A2C+2XeDvwtnNuP4BzrtM5p11QJKlMDh4vL8uJ2TWKstMoyk7jlbqOmF1DklcsE0QVcGbK/abwsYjnOOcmgF6gGLgCcGb2jJntNbO/i3QBM7vfzPaY2Z72dn1CkvnlVMcApbnp5GWkxvQ6K0pz2Hmyk/GAprvKpUnUQeoU4EbgvvC/HzWz26af5Jx7yDm32Tm3ubS0NN4xily28UCQ+o4hlsVgcHq6FWU5DI4F2NfYE/NrSXKJZYI4Cyyecr86fCziOeFxh3ygk1Br4yXnXIdzbgh4CtgUw1hF4urtph7GAkGWl8aue2nS8tIcfAavnFArWy5NLBPEbmClmS01szTgbmDHtHN2AJ8J374LeM6F5uM9A6w3s6xw4ngvcDiGsYrE1Wt1oQ194tGCyEzzc/XiAl46oXEIuTQxSxDhMYUHCP2xPwI86Zw7ZGZfNbM7w6c9DBSbWR3wt8AXw8/tBv6BUJJ5C9jrnPt1rGIVibddp7soz8sgKz0lLte7aUUJbzf10DukyYASvZj+dDrnniLUPTT12Jen3B4BPnGB5z5GaKqrSFIJBB37GrvfqbgaDzeuLOX/fa6Onac6uGNdRdyuK/Nbog5SiySt4639DI4FqCnKits1N9YUkJ3mVzeTXBIlCJE4e7OhG4AlxbEff5iU6vexbXkxryhByCWITweoiLxjb0M3JTlpFGbFdv3DdDetLOX3R9po6Bx8Jzk9vqvxvPPu3VoT17gkcakFIRJnexu72VRTiM1xee+Z3LiyBICX1YqQKClBiMRRx8Ao9Z1Dc7p7XLSWlWRTmZ+hbiaJmhKESBztDY8/eJEgzIybVpby6skOJlR2Q6KgBCESR3sbe0j1W1ynuE5148oS+kcmePtsryfXl/lFCUIkjvY2dLO2Mp+MVL8n179hRQlm8MIxld2QmSlBiMTJ2ESQ/U09nnQvTSrKTmNTTSHPH23zLAaZP6Ka5mpmPyVUFuM3zjl1XopchiPNfYxOBD1NEAC3ri7j/37mGK19I7N+rUjTZEFTZZNFtC2IbwP3AifM7GtmtiqGMYkkncd3NfLQS6cAaOgcuuAf1ni47coyALUiZEZRtSCcc78Hfm9m+cA94dtngO8AjznnVAFMZAaNXUMUZKaSnxnfBXLTrVqUS1VBJs8ebeOWVWXnPa5WgUyKegzCzIqBfwH8GbAP+CahPRp+F5PIRJJMY9cQNcXxq790IWbGbVeW8fKJdsYm1GMsFxZVgjCznwEvA1nAR5xzdzrnnnDO/RUQ+x1PROa5nqExeofHWRLHAn0Xc8e6ckbGgxxr7fc6FElg0dZi+k64dPc7zCzdOTfqnNscg7hEkkpD5xAQ3wJ9F7N1aTElOWkcONvLeo/WZEjii7aL6e8jHNs5l4GIJLOGrkHSUnwsysvwOhQA/D7jA+sqONbSp24muaCLJggzKzeza4BMM9toZpvCXzcT6m4SkSg0dA5RU5iF3xffAn0X86GrKhgPOI629HkdiiSombqY/ojQwHQ1oS1AJ/UDX4pRTCJJZWB0gpbeEW5Zff6MIS9dW1tEfmYqexu7uaq6wOtwJAFdNEE45x4BHjGzjzvnfhKnmESSyr7GbhwkzAD1JL/P2FRTyAvH2ugZGqMgK83rkCTBzNTF9KnwzVoz+9vpX3GIT2Te21PfjQGLEyxBAGwOr+qe3OVOZKqZBqknp1zkALkRvkRkBm82dFOen+FZgb6LKcxOY0VZDnsaugkEndfhSIKZqYvpf4X//Y/xCUckuUwEguxr7PasvHc0ti4t5rFdDRw818vVFxmL8LI8iHgj2oVy/9XM8sws1cyeNbP2Kd1PInIBR1v6GRwLJMz6h0hWV+RSmpPOS8fbcU6tCHlXtOsgbnfO9QEfBuqBFcC/jVVQIslism9/SQKU2LgQnxk3rSyhuXeEE20DXocjCSTaBDHZFfUh4J+dc9qOSiQKexq6Kc/LoMDjAn0z2bC4gPzMVH53uJWgWhESFm2C+JWZHQWuAZ41s1Jg9sXkRZKYc443TneyubYQs8RZIBdJit/H7WsWcbZnmP1nerwORxJEVAnCOfdF4Hpgc7i09yCwPZaBicx3J9sHae0b5YYVJV6HEpWrFxdQVZDJbw+3MjoR8DocSQDRFusDWE1oPcTU5zw6x/GIzBuRZvVM3TNh58kOAK5fXsyrdZ1xi+ty+cz48FUVPPTSKZ4+2ML2DVVehyQei3YW0w+A/wbcCFwb/lIVV5GLeLWuk6qCTGoScIHchSwpzuaGFSXsOt3FCZUCX/CibUFsBtY4zYETiUow6Nh5qpPb1yxK+PGH6d6/ZhHHW/t5Ys8ZPn/zCoqyVYJjoYo2QRwEyoHmGMYikjQON/fROzw+b8Yfpkr1+/jUdUv4Hy+c5NGd9dx/0zKy0i+lN3rm7jeZH6KdxVQCHDazZ8xsx+RXLAMTmc9ePN4OwPUrij2O5PKU5KRz79YaugbH+M4rp+gf0bbzC1G0Hwu+EssgRJLNC8faWFeVR1luYmwQdDmWl+bw6W21PPZ6A996ro6PbqpidXme12FJHEU7zfVFQiuoU8O3dwN7YxiXyLzVOzTO3sYeblmVWPs/XI4VZTn8xXuXkZ2ewqM7G/j2C3W8UtfB6Y5BeobGVOAvyUXVgjCzPwfuB4qA5UAV8D+B22IXmsj89HJdO4Gg4+ZVpV6HMicq8jP5/M3L2V3fxeunu3jqwLtDkQbkZqSwqjyXa2uLqC6cPzO2ZGbRdjF9AdgC7AJwzp0ws/n/8UgkBp4/2k5BViobFhd6HcqcSfH72La8hG3LS+gbHqelb4TeoXF6R8Zp7x9l/5le9tR3c+vqMm5ZXYZvns3cksiiTRCjzrmxyel64cVyaluKTBMIOl441sZ7VpYm1P7TcykvM5W8abWlRsYD7Nh/jmePtjE4NsGdV2uRXTKINkG8aGZfAjLN7P3A54Ffxi4skflpT30XnYNj3L52kdehxFVGqp8/2byYnPQUXqnroDQ3g23L5ucMLnlXtNNcvwi0AweAvwCeAv6PmZ5kZneY2TEzqzOzL0Z4PN3Mngg/vsvMaqc9XmNmA2b2b6KMU8RTzxxqJS3Fx81JMEB9Oe5YV87q8lyeOtBMR/+o1+HILEU7iykI/Bz4vHPuLufcd2ZaVW1mfuBB4APAGuAeM1sz7bTPAd3OuRXAN4CvT3v8H4DfRBOjiNecczxzqIUbV5SQc4kLy5KFz4yPbqwi1W/8Yv9ZbUA0gxePtfHDXQ0JWxzxognCQr5iZh3AMeBYeDe5L0fx2luAOufcKefcGPAjzq8Aux14JHz7x8BtFh7oMLM/Bk4Dh6L+bkQ81Nw7wtmeYe5YW+51KJ7KzUjl9jXlnGwf5OC5Pq/DSVgTgSAvnmjn0Lk+/s+fH0zIZDpTC+JfATcA1zrnipxzRcBW4AYz+1czPLcKODPlflP4WMRznHMTQC9QbGY5wL8DLroXtpndb2Z7zGxPe3v7DOGIxNaBs734fcZtVy7M7qWptiwtoiw3nd8f0QZEF3K8tZ+R8SDLS7N5ck8Tzx9r8zqk88yUIP4UuMc5d3rygHPuFPAp4NMxjOsrwDeccxfd/9A595BzbrNzbnNpaXLMOZf5yTnH/qYeblxRQnFOutfheM5nxq2ry2jvH+VAkzagjOStpl6y0/z86XW1pPqNPfXdXod0npkSRKpzrmP6QedcOzDTHopngcVT7leHj0U8Jzx1Nh/oJNRK+a9mVg/8S+BLZvbADNcT8Uxj1xA9Q+Ns31DpdSgJY11VPmW56Tx3tE2tiGlGxgMcbe5jfXUBaSk+VpTlcrg58brjZkoQY5f5GITKcaw0s6VmlgbcDUwv8LcD+Ez49l3Acy7kJudcrXOuFvjvwH92zn1rhuuJeGZ/Uw8pPuP2BT7+MJXPjFtWldE+MMqxFu0tMdWZriEmgo41FaHaVmsq8jicgOM1MyWIq82sL8JXP7D+Yk8Mjyk8ADwDHAGedM4dMrOvmtmd4dMeJjTmUAf8LaHptCLzSiDoONDUy+qKvAU7e+lC1lXlU5CZyssnNEY4VWvfCADl+aFijldW5NLWP0p7gk0NvuhPs3POP5sXd849RWjNxNRjX55yewT4xAyv8ZXZxCASayfbBxgcC7ChOt/rUBKO32fcsKKEXx9oZl9jNxtrkqf8yGy09o2Sk57yzgeKNZWhlsSR5j5KcxNnPDXahXIicgH7z/SQkerjikW5XoeSkDYvKSQj1cd3Xj7ldSgJo7V/hLK8dyczTHY1Jdo4hBKEyCyMB4Icbu5jbWU+KX79OkWSnupn69Jinj7YQkPnoNfheC7oHG19oyzKe3evkIKsNKoKMhNuHEI/0SKzcLSln9GJIFdXF3gdSkLbtqwYv8/43iunZz45yfUMjTMWCFI+bTOpKyvyOKIWhEjy2NvQTV5GCstKs70OJaHlZaayfUMVT+5pontwpgmQyW1ygHpR3h+ul1lWmk1D11BCrajWlAuRGTy+qzHi8d7hcY639vPeVaXa/yAKf37TMn78ZhM/3NXAA7eu9Docz0wmiLK8P2xBVOZnMDYRpHNwjJIEWWypBCFymfY2duOAazQzJyqrynO5eVUp33+tgT+7aRkZqaFJkpES8L1ba+IdXty09o1QkJn6zvc/qbIgE4BzPcMJkyDUxSRyGYLO8WZDN0tLslVa4xLcf9MyOgZG+fm+6UUVFo6OgTFKc8//mZmaIBKFEoTIZTjdMUjX4Bibl6j1cCm2LS9mbWUe33n5FMFg4vS1x4tzjs7BUYqy0857rOqdBDES77AuSAlC5DK82dBNRqqPdVVaHHcpzIz737OMk+2DCVm9NNZ6hsYZGQ9SHCFBFGSlkpHqUwtCZD4bHgtw8GwvV1cXkKq1D5fsg+srqCrI5KGXFt7CuYauIQCKss/vYjIzKgsyOderBCEyb+1v6mEi6LhG3UuXJdXv47M31LLrdBf7z/R4HU5cTS4ULMo5vwUBoW6mswnUxaRZTCKXwDnHzlOdVBZkvNNnLJfu7i01fPPZEzz08iluWF5y3uMXmlo832c3NXaGWxBZkRNEZX4mR1sSp+tNLQiRS1DXPkB7/yjXLy/BtPbhsuWkp3Df1iX85kAzXQto4VxD1xB5GSmkpUT+01tZkEl7/2jC7FGtBCFyCV6r6yQ7PYWrNDg9a//i+lr8PuOl4wunFHhD52DEGUyTKgtCi+daehOjm0kJQiRKHQOjHGvtZ+vSIhXmmwPl+Rnct3UJu+u7aOlLjD+IsdbQORRxgHrSZLfl2QSZyaSfcpEo7TzVid+MLUuLvA4lafzNbSvJSPXz67fPJVQNolgYHgvQ1h95DcSkygRbC6FBapEpLjQ4OjIeYG9DN+ur88nLmGk7dolWYXYa77uyjF++3czbTb1cvbjA65BipjE8xTXSGohJkzvMtSZIi0otCJEovNnQzehEkOuXF3sdStLZuqyYxYWZ/PLtc/SPjHsdTsxMTnEtvsAUV4CMVD+5GSm0KUGIzA8TwSCv1HWwpCiL6sIsr8NJOj4zPrapmtGJID/bd5ZgknY1NUxOcb1ICwKgLDedtgTZm1oJQmQG+xp76B0e55bVZV6HkrQW5WXwgXXlHG3pT9pZTQ1dg+RlpJCVdvGe/bLcjIRJEBqDELmIQNDx4vF2qgoyWVmW43U4SW3bsmIau4b43eHW0PsdYY/v+VwavKFziCXFM28stSgvnTcbu+MQ0czUghC5iLebeugaHOOWVWVaGBdjZsbHNlZTlpfOE3vO0D2UXAvoGruGqCmeuYuyLC+D1r7RhJjVpQQhcgFB53jhWDvleRmsrjj/06zMvbQUH/dtXUIg6Pjh6w0Js6J4tsYDQc52D1MbTYLITWdsIkjf8EQcIrs4JQiRCzh0ro/2gVFu1paicVWSk87d1y6muXeEJ3efSYpB63M9w0wEHUuKZu5imtxMqK3f+5lMShAiEQSd4/mjbZTkpGvPBw+sKs/jw1dVcKSln6cPtngdzqxNzmCKqospN7QWIhEGqpUgRCI4fK6Plr4RblHrwTPblpewbVkxr9R1sOt0p9fhzMrkPhBLohqDSJwWhGYxiUwTdI7nj7VRkpPGVdUFXoezoH1wfQVdg2P8cv85irLSIs5smg8aOwdJS/GxKNw6uJiyyS6mPrUgRBLOkeY+mntHuGVVGX6fWg9e8vuMu69dTFluBo+/0ZgwK4wvVUPnEEuKsvBF8fOUk55CZqpfXUwiicY5x3NH2yjOVushUaSn+vn0tiWk+H08tquRkfH5N7PpdMdgVGsgIDTdd1FeYqymVoIQmeJIc79aDwmoICuNe65dTNfgKD/Z25QQawSiNR4IUt85yMpF0S+0LMvNSIiCfUoQImGh1kMrRdlpSV1VdL5aVprDH60t59C5Pl4+0eF1OFFr6BxiPOAuaSV+aV467WpBiCSOZ4+0cU6th4R244oS1lXl88yhFuraBrwOJyp1bf0ArCyLfoC9LDc9IcZbNItJFqTpNX2cc3z7hZMUZaexQa2HhGVmfHxjFa19Izyxu5EHbl3pdUgzOtEaSmTLy6Ibg4BQF9PgWIDB0Qmy0737M60EIQKcaBvgbM8wH9tYpdZDgktP9XPflhq+/eJJHt/VQHa6nxTfH3aGJFIBvxNtA1QXZs5YxXWqd6a69o+y1MMEoS4mEeCVEx3kZaSwoabA61AkCmV5GXx8UzVnuof5zYHEXml9om2AFZdYCfidxXIedzMpQciCd65nmLr2AbYtLznvk6gkrvVV+dywvJidpzrZf6bH63AiCgQdJ9sHLrlUfKKU29Bvgyx4r9Z1kOb3saW2yOtQ5BLdsa6CJcVZ/HRfU0JMC52uqXuIsYngJQ1QQ2hPCPB+b+qYdm6Z2R3ANwE/8F3n3NemPZ4OPApcA3QCn3TO1ZvZ+4GvAWnAGPBvnXPPxTJWWZh6h8fZ39TDdcuKyUzzex2OXCK/z7hnSw3feq6Ox15v4C/fu5zs9JSIGwtB5LGJSzn3Uh1rCc1gWnEJayAA8jNTSUvxeT7VNWYtCDPzAw8CHwDWAPeY2Zppp30O6HbOrQC+AXw9fLwD+Ihzbj3wGeAHsYpTFradJztwDq5fXuJ1KHKZ8jJSuW9rDb3D4zy6s56xiaDXIb1j35keUnzGmoq8S3qemVGa4/1q6lh2MW0B6pxzp5xzY8CPgO3TztkOPBK+/WPgNjMz59w+59y58PFDQGa4tSEyZ0bHA7xR38XaqvwZN5KXxLakOJu7r11MU/cw33+tPmHKcext6GZtZR4ZqZfeOi3LS/e8omssE0QVcGbK/abwsYjnOOcmgF6geNo5Hwf2OufOS6Vmdr+Z7TGzPe3tybnRucTOvjM9jIwHuXGFWg/JYE1lPn9y7WIauwZ5+JXT9A6PexrPRCDI2029bKwpvKznhxbLJW8LYtbMbC2hbqe/iPS4c+4h59xm59zm0tLS+AYn85pzjt31XVTkZ7C4MNPrcGSOXF1dwKe2LqF9YJRvPV/H8dZ+z2I52tLP8HiATUsuN0FkJHUX01lg8ZT71eFjEc8xsxQgn9BgNWZWDfwM+LRz7mQM45QF6GzPMM29I1xbW4RpQ6Cksroij8+/dzlZaX6+/1o9//RGIx0D8f9Du7exG4BNl7m2piw3nd7hcU+7y2I5i2k3sNLMlhJKBHcD9047ZwehQeidwF3Ac845Z2YFwK+BLzrnXo1hjLJAvXG6i1S/qaxGkirLy+CvblnBi8fbeelEOwfP9rKmMo+Kggzes7I0Lqvl9zZ0U5abTlXB5bVQF+WF1kK094+yuGjmnehiIWYJwjk3YWYPAM8Qmub6PefcITP7KrDHObcDeBj4gZnVAV2EkgjAA8AK4Mtm9uXwsdudc22xilcWjv6R0NTWq6sLLmvwUOaHFL+P265cxJalRbxS18GbDd189h93U5GfwV3XVHPXNdUX3aMh0vTXaKe+BoOOXae7uGZJ4WW3UEunbD2adAkCwDn3FPDUtGNfnnJ7BPhEhOf9PfD3sYxNFq5fvHWO8YDjWi2MWxByM1L5wLoK3r9mESU56Tyx+wwPPl/H//dcHVuWFrG0OJt1Vflz2qrYd6ab5t4R/t0d5Zf9Gomw9aiK9cmC4pzj8V2NVORnUK3B6QUlxefjg+sr+OD6Clp6R/jJ3iZ+/GYTb5zu4ndHWnnPylI21RSQ4p/90Owv9zeTnuLjfWsWXfZrJEK5DSUIWVAOnO3lcHMfd15dqcHpBaw8P4Mv3LKC//29y/nyLw7xwvE2fv7WWZ4/1sb7rixjY00hvgg/H9Gsug4EHb96u5nbriwjZxaVWIuz00jxGS0elttQgpAF5fFdjWSm+jU4LQD4fMaayjyurMjlZPsgvz3cwk/2nuXlEx380dpyVpfnRvVBYmriON7aT8fAKB+5qnLWsS3Ky6C5Z3hWrzMbShCyYPSPjLNj/zk+cnWFBqflD5gZK8pyWF66nEPn+vjt4RZ+8HoDS4qyuGNd+UUHs6cKBB1PH2whPzOVW1aXzTquqoJMzvV614JI6IVyInNpx/5zDI0FuGdL4mwmI4nFzFhXlc/f3HYF2zdU0jU4xv966RSPvFbP8dZ+gs5d9Pmvn+qkpW+ED181Nx9CKgoyaO5VC0Ik5v7pjUZWl+eyYXEBR5q9W2Eric/vM7YuLWbj4kJeO9nBqyc7+f5r9RRnp3FtbRFrK/Moznm3PJxzjtdPdfKbg82sWpTLmoq8C45XXIqK/ExaepsJBh0+D3Y6VIKQBeFAUy8Hz/bxf21fq8FpiVpaio+bV5Vx44oSDp7r4/VTnTx9qIWnD7WQl5FCWW4GKX6juXeE3uFxVpfn8olrFs/Zz1hlQQbjAUfHwChl4YVz8aQEIQvC4280kJHqY/vG6fUiRWaW4vexYXEBGxYX0DU4xrGWPs50D9M5MMrAqGNxURbvX5TLhpqCiLOfLldFfmgq9rneESUIkVj4x1dO85M3z7K+Kp9f7W/2OhyZ54qy09i2vIRtcbhWZUEoKTT3DHsy806D1JL09jf1MhYIcu1SrZyW+aVySgvCC0oQkvR213dRnqey3jL/FGSlkpHq82wthBKEJLUDTb2c7Rnm2qUq6y3zj5lRmZ/JOY+muipBSFJ7/I3GUFnv6gKvQxG5LBUFGZzrUReTyJzqGxlnx1tnWV9VQGaaVk7L/FSZn+nZYjklCElaP97TxOBYgG3Lpm9zLjJ/VBZk0tY/yuhE/HeWU4KQpBQMOh7dWc+mmgKqNDgt81htSRbOwZmuobhfWwlCktKLJ9qp7xziM9fXeh2KyKwsLckB4FT7YNyvrQQhSemR1+opzU3nA+sqvA5FZFaWhivJnu5QghCZtfqOQV441s59W2tIS9GPuMxv+VmpFGenKUGIzIVHdzaQ6reoN5gXSXTLSrM5pQQhMjt9I+P8854zfHB9xTt7+orMd0tLstWCEJmtx15voH90gj+/aZnXoYjMmaUlObT3j9I/Mh7X6ypBSNIYHgvw8Munee8Vpayryvc6HJE5s7TEm4FqJQhJGk/sbqRzcIwv3LLC61BE5tSyUiUIkcs2NDbBt54/ydalRWxRWW9JMkuKs/AZnGwbiOt1lSAkKfzjq/V0DIzyd3es9joUkTmXnuJneWkOB872xvW6ShAy73UOjPI/XzzJ+64s45olhV6HIxITG2sKeOtMD865uF1TW47KvPT4rsZ3bv90bxODoxOsrdTAtCSvjTWFPLmniYbOIWrDg9axphaEzGtnuoZ4s6Gb65eXsMiDTd1F4mVjTQEA+850x+2aShAyb40HgvxkbxO5GSncurrM63BEYmplWS7ZaX72NfbE7ZpKEDJv/f5wK239o3x0YzUZqdoQSJKb32dcVR0ah4gXJQiZl4429/FKXQdbaotYVZ7rdTgicbGxpoDD5/oYHJ2Iy/WUIGTeqWvr54k9Z6goyOCD61XOWxaO91xRykTQ8fsjrXG5nhKEzCtnuob49MNvkOL38amtS1TOWxaULbVFlOdlsOOtc3G5nn67ZN6oaxvg7odeZ3AswGevr6UgK83rkETiyucz7txQyYvH2+keHIv99WJ+BZE58NzRVj767VcZnQjw2Oe2UlmgfaZlYbrz6komgo5fH2iO+bWUICShtfeP8rFvv8b/9v095KSn8Nkblsa93IBIIllbmce6qjwefL6OgRgPVmsltQDvrkweHgvQ1j9C1+AYgaDjPVeUUpSdRlF2GlUFmRRmx6dbp75jkB/uauCx1xsZnQhw8xWl3Lq6jBS/PtPIwmZmfHX7Oj7+P17j//ntMf7DR9bG7FoxTRBmdgfwTcAPfNc597Vpj6cDjwLXAJ3AJ51z9eHH/j3wOSAA/LVz7plYxrqQOOfoGRrnVMcAx1sHONE6wEsn2mnrG6Fv5A8/kfx039k/uJ+TnsLVi/NZWZbLqvJcrliUw4rSXPKzUmcVU//IOAfP9vHi8XZeONbG0ZZ+/D7jQ+srWFGWQ0lO+qxeXySZbKop5FNbl/D91+qpzM/kz25aipnN+XViliDMzA88CLwfaAJ2m9kO59zhKad9Duh2zq0ws7uBrwOfNLM1wN3AWqAS+L2ZXeGcC8QqXoBA0DE2EWQsEHzn3+7BMToGRukcGKNzcJSOgXfv9wyPEwgG6egPDRZlpvnJTPWTleYnOz2F91xRSklOGsXZ6RTnhD6FZ6T4SU0xUv0+UnyGmeGcYyLoCIS/JoKO0YkA/SMT9A6P0zc8Tt/IBD1DY3QOjLHrdCeDowEGxyYYGg0wNDbBWCBI0IEBPjNS/EZ6io9U/+RX6Jo9Q+N0Do4yHni34FdGqo/i7HSWl+ZQlpfBotx0inPSSfUb4wHH0NgEg6MTdA2O0do3ysDIBE/uOcPQ2Lv/HYVZqSwpzqaqMJOS7DSKc9IpyEolPcVHeoofn88YnwgyHggyNBagczD0Hjb3jnCitZ9zvSMA+AyWFGdzx9pyrl5cQH7m7BKPSLL60gevpGNglP/01BFOdQzwXz521ZxfI5YtiC1AnXPuFICZ/QjYDkxNENuBr4Rv/xj4loXS4HbgR865UeC0mdWFX2/nXAe5/0wPn3xoJ2MToT+wM0lL8VGak05JThr5WWmk+Y1gEByh7pnOwVHOdAcYHJ3gxePtM76ez4jqulNlpvrJTveTlZZCYVYq1YWZbFpSiBngIOgc4wHHeCDI0eZ+AlMSUGVBJn+8sYqSnDRqi7O5YlEu1YWZ/Gj3mYtc8fxP78FwK6S1b+SdhJmaYhw+10fnwOh5LZFI33d2Wgq5mSmU5WWwriqf8rwMakuytSpaJAqZaX6+fd8mHny+jsVFWTG5RiwTRBUw9a9OE7D1Quc45ybMrBcoDh9/fdpzq6ZfwMzuB+4P3x0ws2NzE/rFnZiblykBOubmpeAnl3Duz+bqorM3p+8BwH1z+WLxuf6cvweReP2+wEVjiMt7MEMMnrovju9BBEsu9MC8HqR2zj0EPOR1HJfDzPY45zZ7HYeX9B7oPQC9B5C470Esp4ScBRZPuV8dPhbxHDNLAfIJDVZH81wREYmhWCaI3cBKM1tqZmmEBp13TDtnB/CZ8O27gOdcaLukHcDdZpZuZkuBlcAbMYxVRESmiVkXU3hM4QHgGULTXL/nnDtkZl8F9jjndgAPAz8ID0J3EUoihM97ktCA9gTwhVjPYPLAvOwam2N6D/QegN4DSND3wOK5v6mIiMwfWpYqIiIRKUGIiEhEShBxYGbfM7M2Mzs45ViRmf3OzE6E/y30MsZYM7PFZva8mR02s0Nm9jfh4wvmfTCzDDN7w8z2h9+D/xg+vtTMdplZnZk9EZ7UkbTMzG9m+8zsV+H7C+r7BzCzejM7YGZvmdme8LGE+11QgoiP7wN3TDv2ReBZ59xK4Nnw/WQ2Afxr59wa4DrgC+GSKgvpfRgFbnXOXQ1sAO4ws+sIlZj5hnNuBdBNqARNMvsb4MiU+wvt+590i3Nuw5T1Dwn3u6AEEQfOuZcIzdKaajvwSPj2I8AfxzOmeHPONTvn9oZv9xP6A1HFAnofXMhA+G5q+MsBtxIqNQNJ/h6YWTXwIeC74fvGAvr+Z5BwvwtKEN5Z5Jyb3PGjBVjkZTDxZGa1wEZgFwvsfQh3r7wFtAG/A04CPc65yeJVEcvKJJH/DvwdEAzfL2Zhff+THPBbM3szXDIIEvB3YV6X2kgWzjlnZgtivrGZ5RAqHfUvnXN9U0sUL4T3IbyeZ4OZFRAqi7Xa24jix8w+DLQ55940s5s9DsdrNzrnzppZGfA7Mzs69cFE+V1QC8I7rWZWARD+t83jeGLOzFIJJYcfOud+Gj684N4HAOdcD/A8sA0oCJeageQuK3MDcKeZ1QM/ItS19E0Wzvf/Dufc2fC/bYQ+KGwhAX8XlCC8M7XMyGeAX3gYS8yF+5ofBo445/5hykML5n0ws9JwywEzyyS0V8oRQonirvBpSfseOOf+vXOu2jlXS6hqwnPOuftYIN//JDPLNrPcydvA7cBBEvB3QSup48DM/gm4mVBJ31bgPwA/B54EaoAG4E+cc9MHspOGmd0IvAwc4N3+5y8RGodYEO+DmV1FaPDRT+jD2ZPOua+a2TJCn6iLgH3Ap8J7oSStcBfTv3HOfXihff/h73ey6n4K8Lhz7j+ZWTEJ9rugBCEiIhGpi0lERCJSghARkYiUIEREJCIlCBERiUgJQkREIlKCEBGRiJQgREQkIiUIkTlgZj8PF147NFl8zcw+Z2bHw3tAfMfMvhU+XmpmPzGz3eGvG7yNXiQyLZQTmQNmVuSc6wqX0NgN/BHwKrAJ6AeeA/Y75x4ws8eBbzvnXjGzGuAZ59yVngUvcgGq5ioyN/7azD4avr0Y+FPgxclSCWb2z8AV4cffB6yZUsk2z8xypuwVIZIQlCBEZilcV+h9wDbn3JCZvQAcBS7UKvAB1znnRuISoMhl0hiEyOzlA93h5LCa0Jaq2cB7zawwXMr641PO/y3wV5N3zGxDPIMViZYShMjsPQ2kmNkR4GvA64T2NPjPwBuExiLqgd7w+X8NbDazt83sMPCXcY9YJAoapBaJkclxhXAL4mfA95xzP5vpeSKJQi0Ikdj5Snj/6YPAaUJ7gIjMG2pBiIhIRGpBiIhIREoQIiISkRKEiIhEpAQhIiIRKUGIiEhE/z/RBnJQZIFYogAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "def get_age(df,col = 'age'):\n",
    "    df[col+\"_genFeat1\"]=(df['age'] > 18).astype(int)\n",
    "    df[col+\"_genFeat2\"]=(df['age'] > 25).astype(int)\n",
    "    df[col+\"_genFeat3\"]=(df['age'] > 30).astype(int)\n",
    "    df[col+\"_genFeat4\"]=(df['age'] > 35).astype(int)\n",
    "    df[col+\"_genFeat5\"]=(df['age'] > 40).astype(int)\n",
    "    df[col+\"_genFeat6\"]=(df['age'] > 45).astype(int)\n",
    "    return df, [col + f'_genFeat{i}' for i in range(1, 7)]\n",
    "\n",
    "df['age'] = ((1609430399 - df['CSNY']) / (365 * 24 * 3600)).astype(int)\n",
    "df, genFeats1 = get_age(df, col = 'age')\n",
    "\n",
    "sns.distplot(df['age'][df['age'] > 0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/sunzhongyu/opt/anaconda3/envs/python36/lib/python3.6/site-packages/seaborn/distributions.py:2551: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
      "  warnings.warn(msg, FutureWarning)\n",
      "/Users/sunzhongyu/opt/anaconda3/envs/python36/lib/python3.6/site-packages/seaborn/distributions.py:2551: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
      "  warnings.warn(msg, FutureWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:xlabel='DKFFE', ylabel='Density'>"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfEAAAClCAYAAABMWsQoAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAzJ0lEQVR4nO3dd3xUZfb48c9JDy2FEEoSSOgg0sECCq4oduyLFdtiW7e7X3fd4rpN3f25uqtrR8XGqqigIoiCitJLKKGGnoQUWkhCes7vj7lgCCSZlMnMZM779ZpXZu7ccgZycuY+97nPI6qKMcYYY/xPkLcDMMYYY0zjWBE3xhhj/JQVcWOMMcZPWRE3xhhj/JQVcWOMMcZPWRE3xhhj/JTPFXERmSYiuSKyoZn2111EPheRTSKyUUSSm2O/xpi6eSCXK0Uk1XnMbo59GuPvxNfuExeRc4FCYLqqDmqG/X0F/FVV54tIO6BKVY82db/GmLp5IJcLVbVd0yMzpvXwuTNxVf0GOFh9mYj0EpG5IrJKRBaJSH939iUiA4EQVZ3v7LvQCrgxLaM5c9kYc2o+V8Rr8SLwgKqOAH4F/NfN7foCh0XkAxFZIyL/EJFgj0VpjKlPY3MZIEJEVorIUhG50iPRGeNnQrwdQH2cJvCzgfdE5NjicOe9q4FHT7FZpqpOxPX5zgGGAXuA/wG3Aa94NmpjTE1NzGWAHqqaKSI9gQUisl5Vt3s6bmN8mc8XcVytBYdVdWjNN1T1A+CDOrbNAFJVdQeAiHwEnIkVcWO8oSm5jKpmOj93OH1dhgFWxE1A8/nmdFU9AuwUkesAxGWIm5uvAKJFpJPz+gfARg+EaYypR1NyWURiROTYWXscMAbLZWN8r4iLyDvAEqCfiGSIyJ3ATcCdIrIWSAMmubMvVa3Edd3tSxFZDwjwkmciN8ZU15y5DAwAVjrbLQQeU1Ur4ibg+dwtZsYYY4xxj8+diRtjjDHGPVbEjTHGGD/lU73T4+LiNDk52dthGOPTVq1atV9VO9W/pvdYLhvjnqbms08V8eTkZFauXOntMIzxaSKy29sx1Mdy2Rj3NDWfrTndGGOM8VNWxI0xxhg/ZUXcGGOM8VM+dU3ceNbby/bU+t6NZ3RvwUiMMY1leWyqszNxY4wxxk9ZETcmAIjINBHJFZENtbwvIvJvEUkXkXUiMrzae1NEZJvzmNJyURtj6mNF3JjA8BpwUR3vXwz0cR5TgecARCQW+CNwBjAa+KOIxHg0UmOM26yIGxMAVPUb4GAdq0wCpqvLUlyz/3UFJgLzVfWgqh4C5lP3lwFjTAuyIm6MAUgA9lZ7neEsq225McYHWBE3xjQLEZkqIitFZGVeXp63wzEmIFgRN8YAZAJJ1V4nOstqW34SVX1RVUeq6shOnXx6aHdjWg0r4sYYgNnArU4v9TOBfFXdB8wDLhSRGKdD24XOMmOMD7DBXowJACLyDjAeiBORDFw9zkMBVPV5YA5wCZAOHAVud947KCJ/BlY4u3pUVevqIGeMaUFWxLERkEzrp6o31PO+AvfX8t40YJon4jLGNI01pxtjjDF+yoq4McYY46c8WsRFJFpE3heRzSKySUTO8uTxjDHGmEDi6WviTwNzVfVaEQkD2nj4eMYYY0zA8FgRF5Eo4FzgNgBVLQPKPHU8Y4wxJtB4sjk9BcgDXhWRNSLysoi09eDxjDHGmIDiySIeAgwHnlPVYUAR8FDNlWyoRmOMMaZxPFnEM4AMVV3mvH4fV1E/gQ3VaIwxxjSOx4q4qmYDe0Wkn7PofGCjp45njDHGBBpP905/AHjL6Zm+A2coR2OMMa2fjYbpeR4t4qqaCoz05DGMMcaYQGVjpxtjjBfYWappDjbsqjHGGOOn7EzcT9m3eGOMMXYmbowxxvgpOxNvxQpKypm/MYd5adlsyy3kSHEFXaMiGJ0SS69O7bwdnmlBInIRrrkMgoGXVfWxGu//CzjPedkGiFfVaOe9SmC9894eVb2iRYI2xtTLingrdPhoGS8v2slri3dRWFpBQnQkQ5Ki2H3gKDvyClmfmc/w7tFcOTSBkGBrjGntRCQYeBa4ANcgTCtEZLaqHh+3QVV/Xm39B4Bh1XZRrKpDWyhcY0wDWBFvRY6WVfBd+n7+NmcThaUVXDq4K3eMSWZ49xhEhLeX7aG8soqvtuSxcEsuBSUV3HJWD0KCrJC3cqOBdFXdASAiM4BJ1D740g3AH1soNmNME1gRbwXKK6tYtG0/i7blUVpRxaWnd+Un5/ehX5f2J60bGhzEBQM7E90mlA/XZPLJun1cOTTBC1GbFpQA7K32OgM441QrikgPXJMXLai2OEJEVgIVwGOq+pGH4jTGNJAVcT+mqqzPzGduWjaHj5YzsGsHJgzozC8u7FvvtqOSYzlQWMo32/bT266Pm+9NBt5X1cpqy3qoaqaI9AQWiMh6Vd1ec0MRmQpMBeje3e6QMKYlWDuqnyoqreDNZXuYsWIvkaHB3DU2hZvP7EGXqAi393HBwC4kREcya20WBSXlHozWeFkmkFTtdaKz7FQmA+9UX6Cqmc7PHcBXnHi9vPp6NpmRMS3Mirgf2pCZz38WbGNrdgGXDOrC/ef1pmcjzqaDg4RJQ7tRVFrBMwvTPRCp8RErgD4ikuLMYzAZmF1zJRHpD8QAS6otixGRcOd5HDAGm8jIGJ/hVnO6iHwAvAJ8pqpVng3J1GXJ9gP8aPpKQoKEe8f3olt0ZJP2lxjThuHdY5j27U5uGNWd5Li2zRSp8aBeInIpbuajqlaIyI+BebhuMZumqmki8iiwUlWPFfTJwAxV1WqbDwBeEJEqXF/6H6veq92YllbXQFcQeINduXsm/l/gRmCbiDxWbXpR04JW7jrIlFeX0zUqgrvHNb2AH3PhaZ0JCw7i8bmbm2V/xuNyaWA+quocVe2rqr1U9a/Osj9UK+Co6iOq+lCN7Rar6umqOsT5+UpzfxhjTOO5VcRV9QtVvQkYDuwCvhCRxSJyu4iEejJA47JzfxE/mr6ShOhI/nf3WURFNt8/e4eIUO4cm8JnG7LZkl3QbPs1HlNg+WiMgQb0TheRjsDNwC3AGuAtYCwwBRjvieCMS2FpBXe8tgIR4dXbRhHbNqzO9etrbjqVO8am8Mq3O3lmYTr/ueGU/ZaMD7F8NMaAm2fiIvIhsAjXcIyXq+oVqvo/VX0AsPuTPOitpbu5+eVl7NpfxDXDE1m8/UCjinR9otuEcevZyXyyLov03MJm379pVr2wfDTG4P418ZdUdaCq/l1V9wEc67GqqiM9Fp1hzZ7DpO49zPkD4knxcKezu8amEBESzH+tp7qv22/5aIwB94v4X06xbMkplplmlHOkhI/XZZES15bx/eI9fryO7cK56YzuzFqbxe4DRR4/nmm0bqdYZvnYChWXVbIjr5DySrspyJxandfERaQLriEbI0VkGCDOWx1wNeUZD3r0441UVilXD0sgSKT+DZrB1HE9eWPpbp77ajuPXTO4RY5p3JOdnU1mZiZAkOVj61ZSXskTc7fwxtJdlFcq7SNCeOji/tx0Rg9vh2Z8TH0d2yYCt+Ea4enJassLgN96KCYDLNicw6fr93HBwM50bBfeYseNbx/BD0cl8c7yPfzk/D7Ndhubabp58+bx2muvAYRh+dhqlVVUceu05SzfeZDrRyYyMjmWWamZPPzhBgpKKugQYTcgmO/VWcRV9XXgdRG5RlVnNuYAzjSIK4FMVb2sMfsINEfLKvj9R2n0iW/HOX3iWvz4d4/rxdvL9vDiNzt45IrTWvz45tSmTJnClClTEJGdqnpe/VsYf6OqzFydwYasfJ6ePJRJzuRE1w5P5Ccz1vD43M1MPacnPTraoEzGpc5r4iJys/M0WUR+UfPh5jF+CmxqUpQB5qkvtpF5uJi/XX26V6YJTYiO5KphCbyzfA95BaUtfnxzam+++eaxp+FNyEfjw9bsPcz6zHwenNjveAEHCAoSHr9mMN2iIvlgTSaVVVrHXkwgqa9CHPu61w5of4pHnUQkEbgUeLkJMQaUtKx8Xvl2JzeMTmJUcqzX4rh3fC/KK6t45dudXovBnKio6HhnwyAakY/GtxWXVfLpun30iG3DPef2Oun9tuEh/P6yAeQVlLI243DLB2h8Un3N6S84P//UyP0/Bfwa+wPjlsoq5bcfrCemTSgPXTTAq7H07NSOS07vyhtLdnHPuJ5Et6l7gBnjeXffffexp/uakJPGR32bnkdxeSWXD+lGUNCpO7JOPK0L3aIiWLg5l6FJ0S3W4dX4LncHe3lCRDqISKiIfCkiedWa2mvb5jIgV1VX1bPeVBFZKSIr8/LyGhB66/L2sj38bMYa1mbkc37/zny6fp9HBnVpiPvP601RWSWvLd7l1TjMSRIbmo/GtxWWVvBd+gFOT4iqszOpiDC2TxwHispsUCYDuH+f+IWqegS4DNdYzb2BB+vZZgxwhYjsAmYAPxCRN2uuZHMQu+QXl/P5xhz6xLdjcGKUt8MBYEDXDkwY0JlXv9tl8437lg6NyEfjw77ekkt5ZRUTBnSud91B3aJoGx7Csh0HWiAy4+vcHTv92HqXAu+par7U04yjqr8BfgMgIuOBX6mqnS2cgqoyOzWTKlUmDU2gvn/blvTAD3oz6dnveHnRTn5+QV9vh2Ncjv2CuJ2PxncVl1WyfNdBhiZF06l9/beThgQHMTwpmu+27+doaQVtwt2eAqNOdbX8Bdr0nv7E3TPxT0RkMzAC+FJEOgElngsrsMxZn82m7AImDOhc7+QmLW1IUjSXnN6FlxbtILfA/st9xGHLx9Zj9Z5DlFcqZ/d2/3bSwUnRVClsyDriwciMP3B3KtKHgLOBkapaDhQBk9w9iKp+ZfeIn9rho2X8cfYGukVHcHavlr8n3B0PTuxPaUUV//nSxlT3EZk0IR+N76hSZdnOAyTFRJLQgIGVukVFENcu3HqpG7fPxAH6Az8UkVuBa4ELPRNSYPnbnE0cOlrO1cMSCa6lR6q3pcS15YbRrlHcdu63MdV9RIPzUUQuEpEtIpIuIg+d4v3bnE5yqc7jrmrvTRGRbc5jSrN+kgC2Pa+Q/YVlnNWrY4O2ExGGJEWxa38R+cXWXyWQuXUxRUTewDX9YSpQ6SxWYLpnwgoMCzfn8u7KDO4Z18vrw5vWdj3s2LWwn5zfh5mrMvnnvC08e9PwlgzNnCwF+CcNyEdn5MRngQuADGCFiMxW1Y01Vv2fqv64xraxwB+Bkc5xVjnbHmqGzxLQlu88SNuwYAZ1a3hn1iGJ0Xy5KZd1GYc5p0/gdgoOdO72iBgJDFRVGyaomewvLOXB99fSv0t7fjahDx+szvR2SHWKbx/B1HN78vSX27hp+36fbfoPEG2AMQ3Mx9FAuqruABCRGbia4GsW8VOZCMxX1YPOtvOBi4B3GhS1OUFxWSVbsgsYnRJLSHDDR2aMaxdOt+gINmTmWxEPYO7+5mwAungykECiqvz6/XUcKang6cnDiAgN9nZItXp72Z7jj9i2YcS2DeMn76RSWlFZ/8bGU4ppeD4mAHurvc5wltV0jYisE5H3RSSpgduaBti4L5+KKmVIYnSj9zGoWxR7DxVbk3oAc7eIxwEbRWSeiMw+9vBkYK3Zm0t3s2BzLr+9uD/9uvjPYHahwUFcMaQb+wtLeembHd4OJ5CF4Jl8/BhIVtXBwHzg9YZsbAM3NczajHxi24aRGNP4S2mnOc3waVn5zRWW8TPuNqc/4skgAsn6jHz+/OkmxvXtxJSzk70dToP17dyeQQlR/GdBOpcP6WazKXlHFnB3vWudKBNIqvY60Vl2nKpWHz3kZeCJatuOr7HtVzUPoKovAi8CjBw50i691SG3oITtuYWM7xffpHEhOrUPJ759OGlZR+wSV4By9xazr3GNDBXqPF8BrPZgXK3SoaIy7nlzFXFtw3jy+iE+NahLQ1x6eldCg4N4+MMNVNlsSt5QSMPzcQXQR0RSRCQMmAyccPYuIl2rvbyC72cfnAdcKCIxIhKDqyf8vCZ/igD26bp9KDCkGUZnPK2bq5d6YWlF0wMzfsfd3uk/AqYCsbh6qScAzwPney601qWySrnuhSXkHClh6rk9mZeW4+2QGi0qMpTfXNKfhz/cwPQlu7htTIq3Qwo0ccD7NCAfVbVCRH6Mq/gGA9NUNU1EHgVWqups4CcicgVQARwEbnO2PSgif8b1RQDg0WOd3EzjzErNomtUBPEdIpq8r0EJHVi4JZdNNvBLvVrjqHTuNqffj6t36zIAVd0mIvEei6oV+ufnW0jPLeTqYQkkxrTxdjhNduPo7ny5KZe/f7aZsX3i6B3vP9f2W4F4oC8NzEdVnQPMqbHsD9WeHx8q+RTbTgOmNSFm49h9oIjUvYe56LTm6SvcpUMEsW3D2GDXxQOSux3bSlW17NgLEQnBdb+occPMVRk899V2RifHMtKLc4Q3p3eW7+WMlFiCg4Rbpy1n+pJdx3uxG4+rsnz0X7NTswCabaIjEeG0bh3YnldovdQDkLtF/GsR+S0QKSIXAO/h6slq6rF850Ee+mAdY3p35PIh3bwdTrNqHxHK1cMSyDpcwnw/vjzghwotH/2TqjJrbRajU2KJbtN88yQM6hZFlcKXm3wjD1P3HublRTvYllNAlQ0v4lHuFvGHgDxgPa5esXOA33kqqNZi1/4i7n5jJUmxbfjvjSN8dljVphjYLYozUmJZlL6f9ZnWnNdCMrB89Eub9hWQnlvIpKHN+4U+ISaSDhEhfLYhu1n321Cqyt/mbOLKZ7/jL59u4tXFrha68soqr8bVmrl1TVxVq0TkI+AjVbUbQOvx9rI9FJdV8tzX2ykpr+KqoQl8un6ft8PymEsHdyXrcDEzV2UQ78ZUiqZZfITlo9+ZtTaTkCDhkkFdm7XgBolwWrcovtmaR1FpBW2baXrShnru6+28+M0ObjyjOz+b0Ic/zkpj7oZsZqdmcc2IRK/E1NrVeSYuLo+IyH5gC7DFmSDhD3Vt54+OlJSzbOcBvt2WR+bh4ibtq6yiiulLd3GoqIybz+xBx3atu7CFBAVx4xk9CAsJ4q1lu+26nIeoKo888gjAEFp5PrZGVVXKx6lZnNu3EzEemHL4tIQOlFZU8dUW73yvW5+Rz//7fCuXD+nGX68cRHz7CM7p04lx/Tqxas8hNu2z3vOeUF9z+s+BMcAoVY1V1VjgDGCMiPzc49G1kA2Z+fxr/lZmpWYxZ0M2zy5M582luxt132V5ZRUzVuxhz4GjXD8qiZS4wBgMJSoylBtGd+dgURlTp6+kpNyGZW1u//rXv/juu+8ANrXmfGytVu4+RFZ+SbM3pR+T3LEtHduG8dmGlm/1U1V+N2sDHduG8ZdJg04YA+P8/p2Jbx/Op+v3UWHN6s2uviJ+C3CDqu48tsCZQOFm4FZPBtZSvt22nxkr9hDfPpyfnt+H314ygAsHdmZrTgHPfZVOem6h2/uqqlIemrmezdkFXD6kG6cnNE/vU3+REteWa0cksWznQX42I5VKGwimWb3xxhu88847AMd7pre2fGzNZqVmEhkazIQBnT2y/yARLjytMws357b4l+h5adms3XuYX13Yj6g2oSe8FxwkXHJ6Vw4WlbFqj01819zqK+Khqrq/5kLnOlzoKdb3K0dKyvnFu6nEtQvn9jEpdO4QQbvwEMb3i+dH5/SkrFK55rnFLNtxoN59VVRW8av31jJzdQbnD4jnzJ4Nmx+4tRiaFM3vLxvI3LRsfj9rAzbxXfMpLy8nLu7koTVbSz62ZmUVVXy6fh8XDOzs0evVFw/qSlFZJV9uyvXYMWqqqKziH/O20KtTW64efup5cfrEtyMxJpJF2/bbl/tmVl8RL2vke37h6S+2sb+wlGtHJJ40k1hSbBvuHdeLju3CuOWV5Xy8NqvW/RwpKefet1bzwZpMfnlBX37QL7DHwblzbAr3ju/F28v28PBHNjRrcwkLq/M6qt/nY2v2zdY8Dh8t5woP32Y6pnccCdGRvL18t0ePU90HazLZnlfEgxP71Tqlqogwrm8nDhaVscHuYmlW9RXxISJy5BSPAuD0lgjQU3KOlPDG0t1cMzyx1hHUYtuG8cG9ZzM4MYoH3lnDnz5Oo6DkxE5bS3cc4JKnF7Fgcy5/uuI0Hji/j9+Oid6cfj2xH/c5hfwX76batbBmsHbtWjp06AAwrLXlY2s3c3UGHduGMa6fZ+f9Dg4SbhidxHfpB9iWU+DRYwGUlFfy1PytDEmMYmI9I9AN6NqBTu3D+XprnrXQNaM623VU1Xcnum6il77ZQWWV8pPz+7Bo20lXDI6LbhPGm3edwd/mbOLV73Yxc1UG4/vFE90mlLV7D7M2I58eHdvw3j1nMbx7TAt+At8mIvz6ov60DQ/hH/O2UFjqmju9OZsSaxsdzl/HQK5PZaXrOqeIrFHVkV4Ox7jpUFEZX27K5eYzexBay5lqc7phdHeeXbid577azpM/HOrRY72xZDdZ+SX887r6J3QKEmFcn068vzqDrQ38glFQUs4zC9P5PC2HkvJKRqfEMjo51k6YcH/s9AYTkSRgOtAZ15CQL6rq0546XkMUlVbwv5V7ueT0riTF1j+OeURoMI9OGsQ1wxN59budLNt5kMNHy4lrF8Ylp3dldHIsm/cVsHmf57/5+oPqxTWmTRhXDOnGx2uzuOa5xbx060i3/s2NaS0+XpdFWWUV17bQfdId24Vz0xndeXXxLu4e14t+XTwzr8HBojL+vWAb4/p24uze7k2DOjgpivmbcvimjhOnmvIKSrnllWVsySngvH7xbM0pYFZqFtn5JVwxpFvAF3JPjghQAfxSVVeLSHtglYjMV9WNHjymWz5KzaSgpILbGjif95CkaJ6aPAyoezYcc6Ize3akY9sw3lmxh4lPfcONo7vTs1O7E9ZprWfPxsxclcGArh0Y2K1Dix3zvvN6M3N1Bg9/uJ4ZU8885bXq/OJyPlqTyfKdBzlSUk5JeRWDunWgd3w7twrj019s5WhZJb+7dIDbcYUEBTGmdxxz1u9jzZ5DDKun9bK8sop731zF7gNHmX7HaM7p04k3l+5m7oZsvk3fT+cOEQHbifgYj7XtqOo+VV3tPC/ANTfxqbsutrBZa7LoE9+O4d2jvR1KwOjTuT33jetNm7AQXvl2J/M3ZlsvVdPqpWXlszYjv8XOwo+JbRvGHy4fyMrdh/j9rLQTci3j0FH+/MlGzv77l/xxdhrrMg9zpKSC9ZmHeXXxLl5ctIMDhaV17n9rTgFvLtvDDaOT6NO5YWf6o5JjiAwN5vmvt9e77t/nbGbl7kM8ce1gzunj6k8QJMJFg7rQt3M7Ptuwj0NFgd2ns0XG5hORZGAYztSJ3pR5uJjluw7yqwv7BnwzTEuLax/O/ef14uO1+1i4JY/teUVcOyKRuGYY0a5KlfKKKkJDggLuWrnxXa8v3kVkaHCLF3GAq4YlsiW7kOe/3s66jMOM7RPHjrwiFmzORYDLBnflrnN6MsgZz2L64l2s2XOYuWnZ/GdBOpcP6crw7jEn/Z0sKa/kZzNSiYoM5ecT+jY4rvCQYM7q1ZF5aTlsyMw/fvyaPlmXxbTvdnL7mOSTJo8KEuHKoQk89cU2PkvL5sbRgZvbHi/iItIOmAn8TFVPGndPRKYCUwG6d/f8f8SxW8WuGOITjQIBJzzE9QetT+d2zErN5Okvt3FunziuGpZAZJj7/ShVlYxDR1mfmc/23EJyC0qpcM42oiJD6d+lPWf27EjnDhGe+ih+RUQuAp4GgoGXVfWxGu//ArgL12WwPOAOVd3tvFeJa7IVgD2qekWLBe7HDhaV8cHqTIb3iOHTdd6ZO+Ghi/vTr0s7Xvh6B69+u4v4DuHcfnYyd4xNoVt05AnrhgQHMSollj6d2/Heqgxmrs5kS04hVw39/m9laUUlP52xho37jvDSrSMbPaT02N5xrNlziL/N2cRbd51x0heF9NwCfv3+Okb0iOE3F5+6uT66TRhjendk4ZY8sg4Xn/R5AoVHi7iIhOIq4G+p6genWkdVXwReBBg5cqTH21dnpWYxrHs03Tta5ypvGpIYTUpcW+ZuyGbhljwmPPk194zrybUjkuos5um5hcxKzWRWahZ7Dh4lSFwjxZ3ZsyPtwkMoq6wi50gJq3YfYtnOg5yeEMVlg7vSPiJwx0IRkWDgWeACXDOgrRCR2TX6p6wBRqrqURG5F3gC+KHzXrGqDm3JmFuDGSv2UFGlnOXla7ZXDUvkqmHutwREtwnjzrEpLNqax/xNOWzPLST7SDGxbcP5aE0mW3IK+MNlA7lgYONHnosIDeYXF/bj9x9t4N2Ve/nhqO9P4PKLy7n7jVW0CQvm2RuHExZS+1Xfsb07sWTHAb7clMMtZyU3Oh5/5sne6QK8gmuc5yc9dZyG2JpTwKZ9R3jk8oHeDsUAHSJCuX5kEiOTY1ixy3Xt7ol5WxjfL54ze8bSLTqSsOAg9heWsiEzn8XbD5CWdYQgcQ1qMSo5hgFdO9Am7ORf46LSCpbsOMDXW/NIzy30SnOmDxkNpDtDtCIiM4BJwPEirqoLq62/FNdQrqaRSisqmb54N706tfXL1qAgEcb1i6dP5/Z8sSmH1xfvpqyyij7x7Xj51pFMaEIBP+am0d35dF0Wf5iVRmJMG8b0jiO3oISp01ex92Ax0+8cTZeouv/tIsOCGdu7E19symHvwaMBeeeLJ8/Ex+Aae329iKQ6y36rqnM8eMw6zU7NIkjg0sGeHTXJNEzPuHY8fMkAVuw6xHsr97JwS+5JI+SFhQQxNDGa3106gCuGdCO+Q0Sddwi0DQ9hwoDODE6M4t2Ve3lz6W66RUdyz7iegdgXIgHYW+11Bq6JU2pzJ/BZtdcRIrISV1P7Y6r6UbNH2MrMWL6X7CMl3D4m2duhNEm36EhuPSuZq4cnUFZZRfvwkGbLn6Ag4b83jeD6F5Zw8yvLGJYUzdacQsorq/j3DcPc7nU+pldHvkvfz4LNuUxp4B1HrYHHiriqfgv4zF9LVWXW2kzG9I6jk8157XNExDWAQ0osVVXKviMlZOcXU1GpxLYNIym2zUlD47ojvn0EU8/pxczVGTw+dzM5R0r44+UDA7GQu0VEbgZGAuOqLe6hqpki0hNYICLrVfWkrsUt3b/FVxWXVfLMwnTOSImld41bKf1VRGhwo/KvPrFtw/jwvrP571fbWbX7EBcP6sLUc3s2qMd7eGgw5/SJ4/ONrrPxQOOdmeO9YM3ew+w9WMxPz294b0rTsoKChIToSNcY0Mv2sD2viBW7Gj/7UVhIEJNHJTE6JZZXvt1JaUUVf71yEEFBAVPIM4Gkaq8TnWUnEJEJwMPAOFU9fo+RqmY6P3eIyFe47jQ5qYi3dP8WXzV9yS7yCkp59sbhDZoFMVC1jwjl/y7q36R9nNWzI4u2uc7G/+/ipu3L3wRMEZ+dmkVYSBATT/PMNIDGt4kIPePaMr5vJ95ZvofN+45wzYhEgpwz8lZ++9kKoI+IpOAq3pOBG6uvICLDgBeAi1Q1t9ryGOCoqpaKSByuy2RPtFjkfiY7v4RnFqQzrm8nRqfEWhFvIdXPxlP3HmZoUrS3Q2oxAVHEKyqr+GRdFhMGxDe4l7KNzNZ6iAgXntaFkOAgvtiUgwLXVivkrZWqVojIj4F5uG4xm6aqaSLyKLBSVWcD/wDaAe85lxqO3Uo2AHhBRKpwDQ71mC+MuuiLVJXffbSB8qoqHp10mrfDCTjHzsaf/mIrr94+2tvhtJiAKOKLtx9gf2GZ3Rvuw1ryy9IP+scTJPD5xhxUletGJtW/kZ9zOpTOqbHsD9WeT6hlu8XYDGlu+WTdPr7YlMPDlwygR8e23g4n4FQ/G1++8yCjU2Ld2q60vNLDkXlWQBTxWalZtI8IYbyHpwE0/mO8M+f75xtzAJg8KqnWuZCNqc/WnAIemrmOoUnRft8j3Z+d3SuOtKwj/P6jDXz8wNha7zGvUmXNnkMs3n6AffklAHyYmskvL+zHuL7+VSda/V+tkvJK5qVlc/GgLh7pXWn81/h+8Uwc2Jm1Gfn84t21Nue5aZRDRWXc9fpK2oSH8NzNw+3LoBeFhQTxpytOY0tOAY99tvmU6+QWlPDSNzuYuToTASYM6MyEAfEcKS5nyrTlPLNgW8sG3USt/kx8Xlo2haUVXDnUmtLNycb1iwcRZq/NQoF/XT/E/ggbtx0qKuOWacvIzi9hxt1n0jUqMIf+9CUTBnbmtrOTmfbdTqLbhHL/eb0JDhKKyyr5PC2bRen7CQsO4toRiQxLij5+u+mzNw3nNzPX88/PtxIRGsxd5/T08idxT6sv4jNXZ5IQHRnw09WZ2o3r24lh3aOPf3O3Qm7ckVtQwq2vLGfH/iJeuHUEw+uZVtO0nN9dOoD84nKenL+Vj9ZkkhLXllV7DnH4aDnDkqK5aFCXkzo5h4cE84/rhlBcXsnf5mxiSFI0o5Ldu67uTa26iOccKeHbbXncN753IN0TbBrhnnG9AHjss82UV1Tx9A1DCQ+xyy/m1FL3HubuN1ZypLiCaVNGMbZPnLdDAurvINrKb6U8LiQ4iCevH8L4fp14f1UGmYeLGde3E53bR5AcV3unw+Ag4R/XDSEt6wg/m5HKnJ+eQ1Skb8+70KqL+IdrMqlSuHq4NaWb+t0zrhdhwUE8+slG7nhtBS/cMpJ24a06RYybjhXHKlW+3baf+Zty6BARwl3npPhMATcnEhEmDU1gUrVLqe7cBdMuPISnJg/luueX8MjsNP71w6EejLLpWm2boary/qoMhnePpmcrGfrQeN4dY1P4f9cNYemOg9z08jIOFZV5OyTjI/IKSnnxmx3MTcumX+f23De+t10Db6WGd4/h/vN68+GaTBZszvF2OHVqtacZ36bvJz23kH9cO9jboRg/c82IRDpEhnL/26uZ+NQ33D4m5ZRNaoHSNBnoKquUb9P383laNqHBQVw/MokhiVEBNf5+IA56df95vZizfh+/+3ADn/+io8+2yvlmVM3g5UU7iWsXzhVDbcYy03AXDOzM67eP5rZXl/PCN9u5Y0wKce1s4pxAs2t/EQ++v5YVuw7Rv0t7rhyWQIcWmJs+EIumrwkPCebxawZz7fOL+cfczfxp0iBvh3RKrbKIb8sp4Outefzygr7WOck02lm9OnLX2J68ungnL3yzgyln9SAxJvDmKw5EVVXK60t28fjczYQFB3HdiESGVrsdqTp/Krj+FKsvGNEjhilnJfP6kl1cMbQbI3r4Xm/1VlnEX160k/CQIG46s4e3QzF+orY/bgkxkUw9tyevLd7Fy4t2csPoJPp16dDC0RlPq/7/f7CojPdXZbDrQBH9OrvOvn29h7LxnAcn9mP+xhz+b+Z6Pv3JWJ87MWx1Hdt27i/i/dUZ/HBUErFtw7wdjmkF4ttHcO+4XsS1D+ONpbtZsfOgt0MyHlClytIdB/j3l9vYl1/MNcMTufWsHlbAA1zb8BD+etUg0nMLeXZBurfDOUmrK+KPfbaJ8JAgHvhBH2+HYlqR9hGh/OicnvSOb8eHqZnMS8umsipgp8xudXILSnh50U5mr82iR8c2/PT8PozoERNQnddM7cb3i+eqYQn896vtpGXlezucE7Sq5vT5G3OYl5bDry/qR6f21gnJNK/wkGBuOTOZ2Wuz+HprHre/toJ/Tx5KdBtr8fFXpRWVPP/VDv6zIJ2w4CCuHpZgxdsH+OK1+99fNpDF2/dz9xurmP3jsT7T0ttqinh2fgn/N3Md/bu050d+Muat8T/BQcJVwxJIjI7k0/X7uOw/3/LsjcMZkhTt7dBMA6gqX23J469zNpGeW8jgxCguPb3rSUNxGs/xxUJdl9i2Ybxwy0iuf2EJ9721iul3nFHrLGktyfsRNIOCknLufH0FJeWVPHPjcEJt3GvjYaNSYnn3nrOoqlKufm4xT8zdTImfz0scKNbuPcyNLy3j9tdWUFFZxau3jWLyqO5WwE29hiZF89jVp7N0x0F+NH0lR8sqvB2SZ4u4iFwkIltEJF1EHvLEMbLzS7jhpaVsyS7g2RuH0zveRmczLWNoUjRzf34u1wx3XSv7wT+/4v1VGZT76JSm9eWjiISLyP+c95eJSHK1937jLN8iIhNbNPBmUFFZxdwN+7j++SVMevY7Nmcf4ZHLB/L5z8dxXv94b4dn/MjVwxN5/JrTWbQtjxtfWsau/UVejcdjzekiEgw8C1wAZAArRGS2qm5sjv0XlVbw/qoMnpy/lfLKKl68dYQlo2lxHSJCeeLaIVw9PJG/zdnEr95byz/nbeHGM7pzyeld6NWpnU9cX3UzH+8EDqlqbxGZDDwO/FBEBgKTgdOAbsAXItJXVX266eHw0TJW7DrE52nZLNicy4GiMhJjIvndpQO4flRSiwzaYlpeSzTT/3BUd6Iiw3jwvbVMfOob7hnXi5vO7E58+wiPH7smT14THw2kq+oOABGZAUwCGlXEK6uUVbsPsSEzn9V7DvHlplyKyys5s2csf796MCl1zExjjKed2bMjH903hgWbc3l9yS6enL+VJ+dvJTEmkqFJ0Qzo2oHEmEgSoiOJaRtG27AQ2oQH0yY0mOAgaYlC704+TgIecZ6/DzwjrsAmATNUtRTYKSLpzv6WeCJQVUXVdctXlfPz+9dKeaVytKyC4rJKjpZVUlRWwf7CMnLyS8g+UkLmoWLS9uWz92AxAO0jQjivXzyXDu7KhAGdCbYZDU0zuGhQF4Z1j+aR2Wk8/eU2nl2Yzlm9OjIqOZb+XdqTEBNJ99g2Hr9M48kingDsrfY6AzijKTu87dXlHC2rpFP7cK4clsDVwxMYaT1JjY8IChImDOzMhIGd2ZdfzBcbc1i8/QCrdh/ik3X76t5WXJ3mgkRI+9NET8xn7k4+Hl9HVStEJB/o6CxfWmPbJk0N+PKiHTw5f+vxQq01CnZjhQYLUZGhdImKZODADiTGtqFHxzaEBAVxoLCM/63YW/9OjHFT5w4RPHfzCLbnFfK/FXv5ekse//pi6/Hf4fvP68WDE/t7NAav904XkanAVOdloYhsqW+b3cBK4LHmCyMO2N98u/Moi9UzGhzrTR4KJPSv9a7Sz0OHbpLG5HIT+Mvvlr/ECf4Tq0fi9EQ+//px4n5df6xNGlrUk0U8E0iq9jrRWXYCVX0ReNGDcdRLRFaq6khvxuAui9Uz/C3WRmzmTj4eWydDREKAKOCAm9u2aC77y/+Xv8QJ/hOrv8QJLROrJ3unrwD6iEiKiITh6hgz24PHM8bUzp18nA1McZ5fCyxQVXWWT3Z6r6cAfYDlLRS3MaYOHjsTd66p/RiYBwQD01Q1zVPHM8bUrrZ8FJFHgZWqOht4BXjD6bh2EFehx1nvXVyd4CqA+329Z7oxgcKj18RVdQ4wx5PHaCZebc5vIIvVM1p9rKfKR1X9Q7XnJcB1tWz7V6D+q/Utx1/+v/wlTvCfWP0lTmiBWEWb0hXUGGOMMV5j45MaY4wxfirgi3hLDA3rHGeaiOSKyIZqy2JFZL6IbHN+xjjLRUT+7cS0TkSGV9tmirP+NhGZUm35CBFZ72zzb2eQjlqPUU+sSSKyUEQ2ikiaiPzUV+MVkQgRWS4ia51Y/+QsT3GGDk0X11CiYc7yBg8tWtvvSG3HqCfeYBFZIyKf+HKcvqql8tU5ll/krL/kq7/lqrOd7+era3SkwHzg6uCzHegJhAFrgYEeOta5wHBgQ7VlTwAPOc8fAh53nl8CfAYIcCawzFkeC+xwfsY4z2Oc95Y764qz7cV1HaOeWLsCw53n7YGtwEBfjNfZvp3zPBRY5uz3XWCys/x54F7n+X3A887zycD/nOcDnf//cCDF+b0Irut3pLZj1BPvL4C3gU/q2oe34/TFR12fMZBzFj/JV/wsV/0lX72emN58AGcB86q9/g3wGw8eL5kT/yBsAbo6z7sCW5znLwA31FwPuAF4odryF5xlXYHN1ZYfX6+2YzQw7lm4xtz26XiBNsBqXCOR7QdCav4/4+qdfZbzPMRZT2r+3x9br7bfEWebUx6jjvgSgS+BHwCf1LUPb8bpq4/aPqOHj5mMn+UsfpCv+HiuOuv5Rb4GenP6qYaibNJwkg3UWVWPjceZDXSuJ666lmecYnldx3CL0yw0DNe3Zp+M12nySgVygfm4vuEeVtVj8wRW3/8JQ4sC1YcWbchn6FjHMWrzFPBr4Ng0Z3Xtw5tx+ipv5yv4aA4c4+v56ke5Cn6Sr4FexH2Gur52qS8dQ0TaATOBn6nqkabsqzHcPYaqVqrqUFzfnEcDnh2suBFE5DIgV1VXeTsW0zx8KQfAP/LVH3IV/CtfA72IuzWcpAfliEhXAOdnbj1x1bU88RTL6zpGnUQkFNcfhLdU9QNfjxdAVQ8DC3E1QUWLa+jQmvs/HpO4N7RobcsP1HGMUxkDXCEiu4AZuJronvbBOH2Zt/MVfDQH/C1ffTxXwZ/ytSHXWlrbA9e1ix24Ohwc61xwmgePl8yJ19f+wYmdQp5wnl/KiR1PljvLY4GduDqdxDjPY533anY8uaSuY9QTpwDTgadqLPe5eIFOQLTzPBJYBFwGvMeJnUPuc57fz4kdUN51np/GiR1QduDqfFLr70htx3Dj33c833eU8dk4fe1R12cM5JzFT/IVP8xVf8hXryemtx+4empuxXVt5mEPHucdYB9Qjus6x524rn98CWwDvqiWMAI868S0HhhZbT93AOnO4/Zqy0cCG5xtnuH7gXxOeYx6Yh2Lq1lsHZDqPC7xxXiBwcAaJ9YNwB+c5T1x/eFJd5Ii3Fke4bxOd97vWW1fDzvxbMHpfVvX70htx3Dj33c83/9R8Nk4ffFR22cM5JzFT/IVP8xVf8hXG7HNGGOM8VOBfk3cGGOM8VtWxI0xxhg/ZUXcGGOM8VNWxI0xxhg/ZUXcGGOM8VNWxAOMiFSKSKozi9BaEfmliAQ5740/NluP8/ovIjJXRHaKyOnVlj8oIi+ISLKIFDv7O/a41Rufy5hA0cgcDheRr5xZs47l6rU19nfskezsJ7/G8gne+symdiH1r2JamWJ1DXuIiMTjmqGnA/DH6iuJyO9wjVp0CTAO+K+InAt0A+7Bdd9oFLD92P6MMS2iwTmsqqXimj30JlVdWdv+qm2bDCxS1cs88QFM87Ez8QCmqrnAVODH4mQ4gIj8ErgYuFxVi1V1Lq5BL24F/gU8oqqHvBGzMeZ77uawt+Iznmdn4gFOVXeISDAQ7ywaA/QDRqhqYbVVf4ZrFKFtqvpGteW9nFmJjnlAVRd5MGRjTDUNyGGAt0TkWFE/X1UPAJHVcninql7lPD+nRm5fo6rbm/8TmKawIm5qSsc1bvIFuCZUAEBVs0RkAa55dauz5nRjfMspc9jhVnO6w5rT/YA1pwc4EekJVPL97EM5uK6DPyUi59VYvYrv59Y1xviABuawaWWsiAcwEemEa5acZ7TaIPqquhW4GnhTRIZ6KTxjTD0sh401pweeY9e/QoEK4A3gyZorqeoKEbkdmC0i59VxLazmNfFpqvrvZo7ZGPO9RuVwI45T85r4X1T1/Ubsx3iQzWJmjDHG+ClrTjfGGGP8lBVxY4wxxk9ZETfGGGP8lBVxY4wxxk9ZETfGGGP8lBVxY4wxxk9ZETfGGGP8lBVxY4wxxk/9f3vqzwZINMmAAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 576x144 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "def get_daikuanYE(df,col):\n",
    "    df[col + '_genFeat1'] = (df[col] > 100000).astype(int)\n",
    "    df[col + '_genFeat2'] = (df[col] > 120000).astype(int)\n",
    "    df[col + '_genFeat3'] = (df[col] > 140000).astype(int)\n",
    "    df[col + '_genFeat4'] = (df[col] > 180000).astype(int)\n",
    "    df[col + '_genFeat5'] = (df[col] > 220000).astype(int)\n",
    "    df[col + '_genFeat6'] = (df[col] > 260000).astype(int)\n",
    "    df[col + '_genFeat7'] = (df[col] > 300000).astype(int)\n",
    "    return df, [col + f'_genFeat{i}' for i in range(1, 8)]\n",
    "\n",
    "df, genFeats2 = get_daikuanYE(df, col = 'DKYE')\n",
    "df, genFeats3 = get_daikuanYE(df, col = 'DKFFE')\n",
    "\n",
    "\n",
    "plt.figure(figsize = (8, 2))\n",
    "plt.subplot(1,2,1)\n",
    "sns.distplot(df['DKYE'][df['label'] == 1])\n",
    "plt.subplot(1,2,2)\n",
    "sns.distplot(df['DKFFE'][df['label'] == 1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 6/6 [00:00<00:00, 21.42it/s]\n",
      "100%|██████████| 15/15 [00:00<00:00, 130.88it/s]\n",
      "100%|██████████| 6/6 [00:02<00:00,  2.14it/s]\n",
      "100%|██████████| 23/23 [00:32<00:00,  1.43s/it]\n",
      " 17%|█▋        | 4/23 [00:10<00:49,  2.58s/it]"
     ]
    }
   ],
   "source": [
    "for f in tqdm(cate_cols):\n",
    "    df[f] = df[f].map(dict(zip(df[f].unique(), range(df[f].nunique()))))\n",
    "    df[f + '_count'] = df[f].map(df[f].value_counts())\n",
    "    df = pd.concat([df,pd.get_dummies(df[f],prefix=f\"{f}\")],axis=1)\n",
    "    \n",
    "    \n",
    "cate_cols_combine = [[cate_cols[i], cate_cols[j]] for i in range(len(cate_cols)) \\\n",
    "                     for j in range(i + 1, len(cate_cols))]\n",
    "\n",
    "\n",
    "for f1, f2 in tqdm(cate_cols_combine):\n",
    "    df['{}_{}_count'.format(f1, f2)] = df.groupby([f1, f2])['id'].transform('count')\n",
    "    df['{}_in_{}_prop'.format(f1, f2)] = df['{}_{}_count'.format(f1, f2)] / df[f2 + '_count']\n",
    "    df['{}_in_{}_prop'.format(f2, f1)] = df['{}_{}_count'.format(f1, f2)] / df[f1 + '_count']\n",
    "\n",
    "    \n",
    "for f1 in tqdm(cate_cols):\n",
    "    g = df.groupby(f1)\n",
    "    for f2 in num_cols + gen_feats:\n",
    "        for stat in ['sum', 'mean', 'std', 'max', 'min', 'std']:\n",
    "            df['{}_{}_{}'.format(f1, f2, stat)] = g[f2].transform(stat)\n",
    "    for f3 in genFeats2 + genFeats3:\n",
    "        for stat in ['sum', 'mean']:\n",
    "            df['{}_{}_{}'.format(f1, f2, stat)] = g[f2].transform(stat)\n",
    "\n",
    "num_cols_gen_feats = num_cols + gen_feats\n",
    "for f1 in tqdm(num_cols_gen_feats):\n",
    "    g = df.groupby(f1)\n",
    "    for f2 in num_cols_gen_feats:\n",
    "        if f1 != f2:\n",
    "            for stat in ['sum', 'mean', 'std', 'max', 'min', 'std']:\n",
    "                df['{}_{}_{}'.format(f1, f2, stat)] = g[f2].transform(stat)\n",
    "\n",
    "for i in tqdm(range(len(num_cols_gen_feats))):\n",
    "    for j in range(i + 1, len(num_cols_gen_feats)):\n",
    "        df[f'numsOf_{num_cols_gen_feats[i]}_{num_cols_gen_feats[j]}_add'] = df[num_cols_gen_feats[i]] + df[num_cols_gen_feats[j]]\n",
    "        df[f'numsOf_{num_cols_gen_feats[i]}_{num_cols_gen_feats[j]}_diff'] = df[num_cols_gen_feats[i]] - df[num_cols_gen_feats[j]]\n",
    "        df[f'numsOf_{num_cols_gen_feats[i]}_{num_cols_gen_feats[j]}_multi'] = df[num_cols_gen_feats[i]] * df[num_cols_gen_feats[j]]\n",
    "        df[f'numsOf_{num_cols_gen_feats[i]}_{num_cols_gen_feats[j]}_div'] = df[num_cols_gen_feats[i]] / (df[num_cols_gen_feats[j]] + 0.0000000001)\n",
    "    \n",
    "            "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "训练集、测试集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_df = df[df['label'].isna() == False].reset_index(drop=True)\n",
    "test_df = df[df['label'].isna() == True].reset_index(drop=True)\n",
    "display(train_df.shape, test_df.shape)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "drop_feats = [f for f in train_df.columns if train_df[f].nunique() == 1 or train_df[f].nunique() == 0]\n",
    "len(drop_feats), drop_feats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cols = [col for col in train_df.columns if col not in ['id', 'label'] + drop_feats]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import StratifiedKFold\n",
    "from lightgbm.sklearn import LGBMClassifier\n",
    "from sklearn.metrics import f1_score, roc_auc_score\n",
    "from sklearn.ensemble import RandomForestClassifier,VotingClassifier\n",
    "from xgboost import XGBClassifier\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "oof = np.zeros(train_df.shape[0])\n",
    "# feat_imp_df = pd.DataFrame({'feat': cols, 'imp': 0})\n",
    "test_df['prob'] = 0\n",
    "clf = LGBMClassifier(\n",
    "    learning_rate=0.05,\n",
    "    n_estimators=10230,\n",
    "    num_leaves=31,\n",
    "    subsample=0.8,\n",
    "    colsample_bytree=0.8,\n",
    "    random_state=1023,\n",
    "    metric=None\n",
    ")\n",
    "\n",
    "val_aucs = []\n",
    "seeds = [1023, 2048, 2098]\n",
    "for seed in seeds:\n",
    "    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)\n",
    "    for i, (trn_idx, val_idx) in enumerate(skf.split(train_df, train_df['label'])):\n",
    "        print('--------------------- {} fold ---------------------'.format(i))\n",
    "        t = time.time()\n",
    "        trn_x, trn_y = train_df[cols].iloc[trn_idx].reset_index(drop=True), train_df['label'].values[trn_idx]\n",
    "        val_x, val_y = train_df[cols].iloc[val_idx].reset_index(drop=True), train_df['label'].values[val_idx]\n",
    "        clf.fit(\n",
    "            trn_x, trn_y,\n",
    "            eval_set=[(val_x, val_y)],\n",
    "    #         categorical_feature=cate_cols,\n",
    "            eval_metric='auc',\n",
    "            early_stopping_rounds=200,\n",
    "            verbose=200\n",
    "        )\n",
    "    #     feat_imp_df['imp'] += clf.feature_importances_ / skf.n_splits\n",
    "        oof[val_idx] = clf.predict_proba(val_x)[:, 1]\n",
    "        test_df['prob'] += clf.predict_proba(test_df[cols])[:, 1] / skf.n_splits / len(seeds)\n",
    "\n",
    "    cv_auc = roc_auc_score(train_df['label'], oof)\n",
    "    val_aucs.append(cv_auc)\n",
    "    print('\\ncv_auc: ', cv_auc)\n",
    "print(val_aucs, np.mean(val_aucs))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(val_aucs, np.mean(val_aucs))\n",
    "def tpr_weight_funtion(y_true,y_predict):\n",
    "    d = pd.DataFrame()\n",
    "    d['prob'] = list(y_predict)\n",
    "    d['y'] = list(y_true)\n",
    "    d = d.sort_values(['prob'], ascending=[0])\n",
    "    y = d.y\n",
    "    PosAll = pd.Series(y).value_counts()[1]\n",
    "    NegAll = pd.Series(y).value_counts()[0]\n",
    "    pCumsum = d['y'].cumsum()\n",
    "    nCumsum = np.arange(len(y)) - pCumsum + 1\n",
    "    pCumsumPer = pCumsum / PosAll\n",
    "    nCumsumPer = nCumsum / NegAll\n",
    "    TR1 = pCumsumPer[abs(nCumsumPer-0.001).idxmin()]\n",
    "    TR2 = pCumsumPer[abs(nCumsumPer-0.005).idxmin()]\n",
    "    TR3 = pCumsumPer[abs(nCumsumPer-0.01).idxmin()]\n",
    "    \n",
    "    return 0.4 * TR1 + 0.3 * TR2 + 0.3 * TR3\n",
    "\n",
    "tpr = round(tpr_weight_funtion(train_df['label'], oof), 6)\n",
    "tpr, round(np.mean(val_aucs), 5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "submit.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "submit['id'] = test_df['id']\n",
    "submit['label'] = test_df['prob']\n",
    "\n",
    "submit.to_csv('../sub/submission{}_{}.csv'.format(tpr, round(np.mean(val_aucs), 6)), index = False)\n",
    "submit.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
